From b1c23c62cac2f62b0bf7e76582c28693622b45ab Mon Sep 17 00:00:00 2001 From: zli Date: Wed, 18 Oct 2023 15:04:01 -0400 Subject: [PATCH 1/2] updated doc format for Running ASTRA-sim --- pages/docs/Argument ${NETWORK_CONFIG}.rst | 41 ++++ pages/docs/Argument ${SYSTEM_CONFIG}.rst | 66 ++++++ pages/docs/Argument ${WORKLOAD_CONFIG}.rst | 85 ++++++++ pages/docs/Running ASTRA-sim.rst | 199 +----------------- pages/docs/_build/html/ASTRA-sim Output.html | 4 +- .../html/Argument ${NETWORK_CONFIG}.html | 140 ++++++++++++ .../html/Argument ${SYSTEM_CONFIG}.html | 166 +++++++++++++++ .../html/Argument ${WORKLOAD_CONFIG}.html | 182 ++++++++++++++++ pages/docs/_build/html/Getting Started.html | 12 +- pages/docs/_build/html/Running ASTRA-sim.html | 174 ++------------- .../Argument ${NETWORK_CONFIG}.rst.txt | 41 ++++ .../Argument ${SYSTEM_CONFIG}.rst.txt | 66 ++++++ .../Argument ${WORKLOAD_CONFIG}.rst.txt | 85 ++++++++ .../html/_sources/Running ASTRA-sim.rst.txt | 199 +----------------- pages/docs/_build/html/index.html | 12 +- pages/docs/_build/html/objects.inv | Bin 361 -> 488 bytes pages/docs/_build/html/searchindex.js | 2 +- 17 files changed, 914 insertions(+), 560 deletions(-) create mode 100644 pages/docs/Argument ${NETWORK_CONFIG}.rst create mode 100644 pages/docs/Argument ${SYSTEM_CONFIG}.rst create mode 100644 pages/docs/Argument ${WORKLOAD_CONFIG}.rst create mode 100644 pages/docs/_build/html/Argument ${NETWORK_CONFIG}.html create mode 100644 pages/docs/_build/html/Argument ${SYSTEM_CONFIG}.html create mode 100644 pages/docs/_build/html/Argument ${WORKLOAD_CONFIG}.html create mode 100644 pages/docs/_build/html/_sources/Argument ${NETWORK_CONFIG}.rst.txt create mode 100644 pages/docs/_build/html/_sources/Argument ${SYSTEM_CONFIG}.rst.txt create mode 100644 pages/docs/_build/html/_sources/Argument ${WORKLOAD_CONFIG}.rst.txt diff --git a/pages/docs/Argument ${NETWORK_CONFIG}.rst b/pages/docs/Argument ${NETWORK_CONFIG}.rst new file mode 100644 index 0000000..ca2c40a --- /dev/null +++ b/pages/docs/Argument ${NETWORK_CONFIG}.rst @@ -0,0 +1,41 @@ +Argument ${NETWORK_CONFIG} +========================== + +.. code-block:: console + + --network-configuration: path to the network configuration file + +Example network configurations can be found at + +.. code-block:: console + + ${ASTRA_SIM}/inputs/network/ + +* **topology-name**: (string) put "Hierarchical" + +* **dimensions-count**: (uint) number of network dimensions + +.. note:: + Each configurations below is represented as an array of size **dimensions-count**, indexed by the dimension level. + +* **topologies-per-dim**: (string) network topology ("Ring", "FullyConnected", or "Switch") + +* **dimension-type**: (string) dimension type ("Tile", "Package", "Node", or "Pod") + +* **units-count**: (uint) number of GPUs + +* **links-count**: (uint) number of links + +* **link-latency**: (uint) link latency (ns) + +* **link-bandwidth**: (uint) link bandwidth (GB/s or B/ns) + +* **nic-latency**: (uint) nic latency (ns) + +* **router-latency**: (uint) router latency (ns) + +* **hbm-latency**: (uint) memory latency (ns) + +* **hbm-bandwidth**: (uint) memory bandwidth (GB/s or B/ns) + +* **hbm-scale**: (uint) memory scaling factor diff --git a/pages/docs/Argument ${SYSTEM_CONFIG}.rst b/pages/docs/Argument ${SYSTEM_CONFIG}.rst new file mode 100644 index 0000000..0ed5474 --- /dev/null +++ b/pages/docs/Argument ${SYSTEM_CONFIG}.rst @@ -0,0 +1,66 @@ +Argument ${SYSTEM_CONFIG} +========================= + +.. code-block:: console + + --system-configuration: path to the system configuration file + +Example system configurations can be found at: + +.. code-block:: console + + ${ASTRA_SIM}/inputs/system/ + +* **scheduling-policy**: (LIFO/FIFO) + + The order we proritize collectives according based on their time of arrival. LIFO means that most recently created collectives have higher priority. While FIFO is the reverse. + +* **intra-dimension-scheduling**: (FIFO/SCF) + + The order we proritize collective chunks inside each dimension. FIFO means that the least recently created collectives have higher priority. SCF means that the smallest chunk have higher priority. + +* **inter-dimension-scheduling**: (baseline/themis) + + The order we proritize collective chunks across multiple dimensions. baseline means that the scheduling always uses a constant schedule for all chunks. themis means that the scheduling issues chunks to the dimension with least load. + +* **endpoint-delay**: (int) + + The time NPU spends processing a message after receiving it in terms of cycles. + +* **active-chunks-per-dimension**: (int) + + This corresponds to the Maximum number of chunks we like execute in parallel on + each logical dimesnion of topology. + +* **preferred-dataset-splits**: (int) + + The number of chunks we divide each collective into. + +* **all-reduce-implementation**: (Dimension0Collective_Dimension1Collective_xxx_DimensionNCollective) + + Here we can create a multiphase colective all-reduce algorithm and directly specify the collective algorithm type for each logical dimension. The available options (algorithms) are: ring, direct, doubleBinaryTree, oneRing, oneDirect. + + For example, "ring_doubleBinaryTree" means we create a logical topology with 2 dimensions and we perform ring algorithm on the first dimension followed by double binary tree on the second dimension for the all-reduce pattern. Hence the number of physical dimension should be equal to the number of logical dimensions. The only exceptions are oneRing/oneDirect where we assume no matter how many physical dimensions we have, we create a one big logical ring/direct(AllToAll) topology where all NPUs are connected and perfrom a one phase ring/direct algorithm. + +.. note:: + + oneRing and oneDirect is not available for Garnet Backend in this version. + +* **reduce-scatter-implementation**: (Dimension0CollectiveAlg_Dimension1CollectiveAlg_xxx_DimensionNCollectiveAlg) + + The same as "all-reduce-implementation:" but for reduce-scatter collective. The available options are: ring, direct, oneRing, oneDirect. + +* **all-gather-implementation**: (Dimension0CollectiveAlg_Dimension1CollectiveAlg_xxx_DimensionNCollectiveAlg) + + The same as "all-reduce-implementation:" but for all-gather collective. The available options (algorithms) are: ring, direct, oneRing, oneDirect. + +* **all-to-all-implementation**: (Dimension0CollectiveAlg_Dimension1CollectiveAlg_xxx_DimensionNCollectiveAlg) + + The same as "all-reduce-implementation:" but for all-to-all collective. The available options (algorithms) are: ring, direct, oneRing, oneDirect. + +* **collective-optimization**: (baseline/localBWAware) + + baseline issues allreduce across all dimensions to handle allreduce of single chunk. While for an N-dimensional network, localBWAware issues a series of reduce-scatters on all dimensions from dim1 to dimN-1, followed by all-reduce on dimN, and then series of all-gathers starting from dimN-1 to dim1. This optimization is used to reduce the chunk size as it goes to the next network dimensions. + +.. note:: + The default clock cycle period is 1ns (1 Ghz feq). This value is defined inside Sys.hh. One can change it to any number. It will be a configurable command line parameter in the later versions. diff --git a/pages/docs/Argument ${WORKLOAD_CONFIG}.rst b/pages/docs/Argument ${WORKLOAD_CONFIG}.rst new file mode 100644 index 0000000..37b88fc --- /dev/null +++ b/pages/docs/Argument ${WORKLOAD_CONFIG}.rst @@ -0,0 +1,85 @@ +Argument ${WORKLOAD_CONFIG} +=========================== + +.. code-block:: console + + --workload-configuration: path to the prefix of execution trace files + +The naming rule for execution traces follows the format {path_prefix}.{npu_id}.eg. + +.. note:: + Execution traces can be created using Chakra tools. You have the option of using either execution trace generator (:ref:`et_generator `) or execution trace converter (:ref:`et_converter `). + +.. _et_generator: + +Using Execution Trace Generator (et_generator) +---------------------------------------------- + +et_generator can be used to define and generate any execution traces, functioning as a test case generator. You can generate execution traces with the following commands: + +.. code-block:: console + + $ cd ${ASTRA_SIM}/extern/graph_frontend/chakra/et_generator + $ cmake CMakeLists.txt && make -j$(nproc) + $ ./et_generator --num_npus 64 --num_dims 1 + +To run one of the example traces (twoCompNodesDependent), execute the following command: + +.. code-block:: console + + $ cd - + $ ./build/astra_analytical/build/AnalyticalAstra/bin/AnalyticalAstra \ + --workload-configuration=./extern/graph_frontend/chakra/et_generator/twoCompNodesDependent \ + --system-configuration=./inputs/system/sample_fully_connected_sys.txt \ + --network-configuration=./inputs/network/analytical/fully_connected.json \ + --memory-configuration=./inputs/memory/analytical/no_memory_expansion.json + +Upon completion, ASTRA-sim will display the number of cycles it took to run the simulation. + +.. code-block:: console + + sys[0] finished, 10 cycles + sys[1] finished, 10 cycles + ... + sys[62] finished, 10 cycles + sys[63] finished, 10 cycles + +.. _et_converter: + +Using Execution Trace Converter (et_converter) +---------------------------------------------- + +et_converter is a trace schema conversion tool, supporting PyTorch and FlexFlow execution traces, as well as ASTRA-sim 1.0 input files. You can convert ASTRA-sim 1.0 text input files into Chakra traces with the following commands: + +.. code-block:: console + + $ cd ${ASTRA_SIM}/extern/graph_frontend/chakra/ + $ python3 setup.py install --user + $ python3 -m et_converter.et_converter \ + --input_type Text \ + --input_filename ../../../inputs/workload/ASTRA-sim-1.0/Resnet50_DataParallel.txt \ + --output_filename ../../../inputs/workload/ASTRA-sim-2.0/Resnet50_DataParallel \ + --num_npus 64 \ + --num_dims 1 \ + --num_passes 1 + +Run the following command: + +.. code-block:: console + + $ cd - + $ ./build/astra_analytical/build/AnalyticalAstra/bin/AnalyticalAstra \ + --workload-configuration=./inputs/workload/ASTRA-sim-2.0/Resnet50_DataParallel \ + --system-configuration=./inputs/system/sample_fully_connected_sys.txt \ + --network-configuration=./inputs/network/analytical/fully_connected.json \ + --memory-configuration=./inputs/memory/analytical/no_memory_expansion.json + +Upon completion, ASTRA-sim will display the number of cycles it took to run the simulation. + +.. code-block:: console + + sys[62] finished, 187442108 cycles + sys[61] finished, 187442108 cycles + ... + sys[0] finished, 187442108 cycles + sys[63] finished, 187442108 cycles diff --git a/pages/docs/Running ASTRA-sim.rst b/pages/docs/Running ASTRA-sim.rst index 45aadb1..b1d2748 100644 --- a/pages/docs/Running ASTRA-sim.rst +++ b/pages/docs/Running ASTRA-sim.rst @@ -6,7 +6,7 @@ Once ASTRA-sim is built, the executable ${BINARY} is located at: .. code-block:: console # For the analytical network backend - $ {ASTRA_SIM}/build/astra_analytical/build/AnalyticalAstra/bin/AnalyticalAstra + $ ${ASTRA_SIM}/build/astra_analytical/build/AnalyticalAstra/bin/AnalyticalAstra Conduct experiments by passing the required aruguments: @@ -18,198 +18,11 @@ Conduct experiments by passing the required aruguments: --network-configuration=${NETWORK_CONFIG} \ --memory-configuration=${ASTRA_SIM}/inputs/memory/analytical/no_memory_expansion.json -.. note:: - Additional arguments may be required based on the type of network backend. - -Argument ${WORKLOAD_CONFIG} ---------------------------- - -.. code-block:: console +.. toctree:: - --workload-configuration: path to the prefix of execution trace files - -The naming rule for execution traces follows the format {path_prefix}.{npu_id}.eg. + Argument ${WORKLOAD_CONFIG} + Argument ${SYSTEM_CONFIG} + Argument ${NETWORK_CONFIG} .. note:: - Execution traces can be created using Chakra tools. You have the option of using either execution trace generator (et_generator) or execution trace converter (et_converter). - -Argument ${SYSTEM_CONFIG} -------------------------- - -.. code-block:: console - - --system-configuration: path to the system configuration file - -Example system configurations can be found at: - -.. code-block:: console - - ${ASTRA_SIM}/inputs/system/ - -* **scheduling-policy**: (LIFO/FIFO) - - The order we proritize collectives according based on their time of arrival. LIFO means that most recently created collectives have higher priority. While FIFO is the reverse. - -* **intra-dimension-scheduling**: (FIFO/SCF) - - The order we proritize collective chunks inside each dimension. FIFO means that the least recently created collectives have higher priority. SCF means that the smallest chunk have higher priority. - -* **inter-dimension-scheduling**: (baseline/themis) - - The order we proritize collective chunks across multiple dimensions. baseline means that the scheduling always uses a constant schedule for all chunks. themis means that the scheduling issues chunks to the dimension with least load. - -* **endpoint-delay**: (int) - - The time NPU spends processing a message after receiving it in terms of cycles. - -* **active-chunks-per-dimension**: (int) - - This corresponds to the Maximum number of chunks we like execute in parallel on - each logical dimesnion of topology. - -* **preferred-dataset-splits**: (int) - - The number of chunks we divide each collective into. - -* **all-reduce-implementation**: (Dimension0Collective_Dimension1Collective_xxx_DimensionNCollective) - - Here we can create a multiphase colective all-reduce algorithm and directly specify the collective algorithm type for each logical dimension. The available options (algorithms) are: ring, direct, doubleBinaryTree, oneRing, oneDirect. - - For example, "ring_doubleBinaryTree" means we create a logical topology with 2 dimensions and we perform ring algorithm on the first dimension followed by double binary tree on the second dimension for the all-reduce pattern. Hence the number of physical dimension should be equal to the number of logical dimensions. The only exceptions are oneRing/oneDirect where we assume no matter how many physical dimensions we have, we create a one big logical ring/direct(AllToAll) topology where all NPUs are connected and perfrom a one phase ring/direct algorithm. - -.. note:: - - oneRing and oneDirect is not available for Garnet Backend in this version. - -* **reduce-scatter-implementation**: (Dimension0CollectiveAlg_Dimension1CollectiveAlg_xxx_DimensionNCollectiveAlg) - - The same as "all-reduce-implementation:" but for reduce-scatter collective. The available options are: ring, direct, oneRing, oneDirect. - -* **all-gather-implementation**: (Dimension0CollectiveAlg_Dimension1CollectiveAlg_xxx_DimensionNCollectiveAlg) - - The same as "all-reduce-implementation:" but for all-gather collective. The available options (algorithms) are: ring, direct, oneRing, oneDirect. - -* **all-to-all-implementation**: (Dimension0CollectiveAlg_Dimension1CollectiveAlg_xxx_DimensionNCollectiveAlg) - - The same as "all-reduce-implementation:" but for all-to-all collective. The available options (algorithms) are: ring, direct, oneRing, oneDirect. - -* **collective-optimization**: (baseline/localBWAware) - - baseline issues allreduce across all dimensions to handle allreduce of single chunk. While for an N-dimensional network, localBWAware issues a series of reduce-scatters on all dimensions from dim1 to dimN-1, followed by all-reduce on dimN, and then series of all-gathers starting from dimN-1 to dim1. This optimization is used to reduce the chunk size as it goes to the next network dimensions. - -.. note:: - The default clock cycle period is 1ns (1 Ghz feq). This value is defined inside Sys.hh. One can change it to any number. It will be a configurable command line parameter in the later versions. - -Argument ${NETWORK_CONFIG} --------------------------- - -.. code-block:: console - - --network-configuration: path to the network configuration file - -Example network configurations can be found at - -.. code-block:: console - - ${ASTRA_SIM}/inputs/network/ - -* **topology-name**: (string) put "Hierarchical" - -* **dimensions-count**: (uint) number of network dimensions - -.. note:: - Each configurations below is represented as an array of size **dimensions-count**, indexed by the dimension level. - -* **topologies-per-dim**: (string) network topology ("Ring", "FullyConnected", or "Switch") - -* **dimension-type**: (string) dimension type ("Tile", "Package", "Node", or "Pod") - -* **units-count**: (uint) number of GPUs - -* **links-count**: (uint) number of links - -* **link-latency**: (uint) link latency (ns) - -* **link-bandwidth**: (uint) link bandwidth (GB/s or B/ns) - -* **nic-latency**: (uint) nic latency (ns) - -* **router-latency**: (uint) router latency (ns) - -* **hbm-latency**: (uint) memory latency (ns) - -* **hbm-bandwidth**: (uint) memory bandwidth (GB/s or B/ns) - -* **hbm-scale**: (uint) memory scaling factor - -Using Execution Trace Generator (et_generator) ----------------------------------------------- - -et_generator can be used to define and generate any execution traces, functioning as a test case generator. You can generate execution traces with the following commands: - -.. code-block:: console - - $ cd {ASTRA_SIM}/extern/graph_frontend/chakra/et_generator - $ cmake CMakeLists.txt && make -j$(nproc) - $ ./et_generator --num_npus 64 --num_dims 1 - -To run one of the example traces (twoCompNodesDependent), execute the following command: - -.. code-block:: console - - $ cd - - $ ./build/astra_analytical/build/AnalyticalAstra/bin/AnalyticalAstra \ - --workload-configuration=./extern/graph_frontend/chakra/et_generator/twoCompNodesDependent \ - --system-configuration=./inputs/system/sample_fully_connected_sys.txt \ - --network-configuration=./inputs/network/analytical/fully_connected.json \ - --memory-configuration=./inputs/memory/analytical/no_memory_expansion.json - -Upon completion, ASTRA-sim will display the number of cycles it took to run the simulation. - -.. code-block:: console - - sys[0] finished, 10 cycles - sys[1] finished, 10 cycles - ... - sys[62] finished, 10 cycles - sys[63] finished, 10 cycles - -Using Execution Trace Converter (et_converter) ----------------------------------------------- - -et_converter is a trace schema conversion tool, supporting PyTorch and FlexFlow execution traces, as well as ASTRA-sim 1.0 input files. You can convert ASTRA-sim 1.0 text input files into Chakra traces with the following commands: - -.. code-block:: console - - $ cd {ASTRA_SIM}/extern/graph_frontend/chakra/ - $ python3 setup.py install --user - $ python3 -m et_converter.et_converter \ - --input_type Text \ - --input_filename ../../../inputs/workload/ASTRA-sim-1.0/Resnet50_DataParallel.txt \ - --output_filename ../../../inputs/workload/ASTRA-sim-2.0/Resnet50_DataParallel \ - --num_npus 64 \ - --num_dims 1 \ - --num_passes 1 - -Run the following command: - -.. code-block:: console - - $ cd - - $ ./build/astra_analytical/build/AnalyticalAstra/bin/AnalyticalAstra \ - --workload-configuration=./inputs/workload/ASTRA-sim-2.0/Resnet50_DataParallel \ - --system-configuration=./inputs/system/sample_fully_connected_sys.txt \ - --network-configuration=./inputs/network/analytical/fully_connected.json \ - --memory-configuration=./inputs/memory/analytical/no_memory_expansion.json - -Upon completion, ASTRA-sim will display the number of cycles it took to run the simulation. - -.. code-block:: console - - sys[62] finished, 187442108 cycles - sys[61] finished, 187442108 cycles - ... - sys[0] finished, 187442108 cycles - sys[63] finished, 187442108 cycles - - + Additional arguments may be required based on the type of network backend. diff --git a/pages/docs/_build/html/ASTRA-sim Output.html b/pages/docs/_build/html/ASTRA-sim Output.html index f15595b..d37fb86 100644 --- a/pages/docs/_build/html/ASTRA-sim Output.html +++ b/pages/docs/_build/html/ASTRA-sim Output.html @@ -15,7 +15,7 @@ - + @@ -89,7 +89,7 @@

ASTRA-sim Output - +
diff --git a/pages/docs/_build/html/Argument ${NETWORK_CONFIG}.html b/pages/docs/_build/html/Argument ${NETWORK_CONFIG}.html new file mode 100644 index 0000000..f975821 --- /dev/null +++ b/pages/docs/_build/html/Argument ${NETWORK_CONFIG}.html @@ -0,0 +1,140 @@ + + + + + + + Argument ${NETWORK_CONFIG} — ASTRA-sim 1.0.0 documentation + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

Argument ${NETWORK_CONFIG}

+
--network-configuration: path to the network configuration file
+
+
+

Example network configurations can be found at

+
${ASTRA_SIM}/inputs/network/
+
+
+
    +
  • topology-name: (string) put “Hierarchical”

  • +
  • dimensions-count: (uint) number of network dimensions

  • +
+
+

Note

+

Each configurations below is represented as an array of size dimensions-count, indexed by the dimension level.

+
+
    +
  • topologies-per-dim: (string) network topology (“Ring”, “FullyConnected”, or “Switch”)

  • +
  • dimension-type: (string) dimension type (“Tile”, “Package”, “Node”, or “Pod”)

  • +
  • units-count: (uint) number of GPUs

  • +
  • links-count: (uint) number of links

  • +
  • link-latency: (uint) link latency (ns)

  • +
  • link-bandwidth: (uint) link bandwidth (GB/s or B/ns)

  • +
  • nic-latency: (uint) nic latency (ns)

  • +
  • router-latency: (uint) router latency (ns)

  • +
  • hbm-latency: (uint) memory latency (ns)

  • +
  • hbm-bandwidth: (uint) memory bandwidth (GB/s or B/ns)

  • +
  • hbm-scale: (uint) memory scaling factor

  • +
+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/pages/docs/_build/html/Argument ${SYSTEM_CONFIG}.html b/pages/docs/_build/html/Argument ${SYSTEM_CONFIG}.html new file mode 100644 index 0000000..a762cb3 --- /dev/null +++ b/pages/docs/_build/html/Argument ${SYSTEM_CONFIG}.html @@ -0,0 +1,166 @@ + + + + + + + Argument ${SYSTEM_CONFIG} — ASTRA-sim 1.0.0 documentation + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

Argument ${SYSTEM_CONFIG}

+
--system-configuration: path to the system configuration file
+
+
+

Example system configurations can be found at:

+
${ASTRA_SIM}/inputs/system/
+
+
+
    +
  • scheduling-policy: (LIFO/FIFO)

    +

    The order we proritize collectives according based on their time of arrival. LIFO means that most recently created collectives have higher priority. While FIFO is the reverse.

    +
  • +
  • intra-dimension-scheduling: (FIFO/SCF)

    +

    The order we proritize collective chunks inside each dimension. FIFO means that the least recently created collectives have higher priority. SCF means that the smallest chunk have higher priority.

    +
  • +
  • inter-dimension-scheduling: (baseline/themis)

    +

    The order we proritize collective chunks across multiple dimensions. baseline means that the scheduling always uses a constant schedule for all chunks. themis means that the scheduling issues chunks to the dimension with least load.

    +
  • +
  • endpoint-delay: (int)

    +

    The time NPU spends processing a message after receiving it in terms of cycles.

    +
  • +
  • active-chunks-per-dimension: (int)

    +

    This corresponds to the Maximum number of chunks we like execute in parallel on +each logical dimesnion of topology.

    +
  • +
  • preferred-dataset-splits: (int)

    +

    The number of chunks we divide each collective into.

    +
  • +
  • all-reduce-implementation: (Dimension0Collective_Dimension1Collective_xxx_DimensionNCollective)

    +

    Here we can create a multiphase colective all-reduce algorithm and directly specify the collective algorithm type for each logical dimension. The available options (algorithms) are: ring, direct, doubleBinaryTree, oneRing, oneDirect.

    +

    For example, “ring_doubleBinaryTree” means we create a logical topology with 2 dimensions and we perform ring algorithm on the first dimension followed by double binary tree on the second dimension for the all-reduce pattern. Hence the number of physical dimension should be equal to the number of logical dimensions. The only exceptions are oneRing/oneDirect where we assume no matter how many physical dimensions we have, we create a one big logical ring/direct(AllToAll) topology where all NPUs are connected and perfrom a one phase ring/direct algorithm.

    +
  • +
+
+

Note

+

oneRing and oneDirect is not available for Garnet Backend in this version.

+
+
    +
  • reduce-scatter-implementation: (Dimension0CollectiveAlg_Dimension1CollectiveAlg_xxx_DimensionNCollectiveAlg)

    +

    The same as “all-reduce-implementation:” but for reduce-scatter collective. The available options are: ring, direct, oneRing, oneDirect.

    +
  • +
  • all-gather-implementation: (Dimension0CollectiveAlg_Dimension1CollectiveAlg_xxx_DimensionNCollectiveAlg)

    +

    The same as “all-reduce-implementation:” but for all-gather collective. The available options (algorithms) are: ring, direct, oneRing, oneDirect.

    +
  • +
  • all-to-all-implementation: (Dimension0CollectiveAlg_Dimension1CollectiveAlg_xxx_DimensionNCollectiveAlg)

    +

    The same as “all-reduce-implementation:” but for all-to-all collective. The available options (algorithms) are: ring, direct, oneRing, oneDirect.

    +
  • +
  • collective-optimization: (baseline/localBWAware)

    +

    baseline issues allreduce across all dimensions to handle allreduce of single chunk. While for an N-dimensional network, localBWAware issues a series of reduce-scatters on all dimensions from dim1 to dimN-1, followed by all-reduce on dimN, and then series of all-gathers starting from dimN-1 to dim1. This optimization is used to reduce the chunk size as it goes to the next network dimensions.

    +
  • +
+
+

Note

+

The default clock cycle period is 1ns (1 Ghz feq). This value is defined inside Sys.hh. One can change it to any number. It will be a configurable command line parameter in the later versions.

+
+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/pages/docs/_build/html/Argument ${WORKLOAD_CONFIG}.html b/pages/docs/_build/html/Argument ${WORKLOAD_CONFIG}.html new file mode 100644 index 0000000..e3e736e --- /dev/null +++ b/pages/docs/_build/html/Argument ${WORKLOAD_CONFIG}.html @@ -0,0 +1,182 @@ + + + + + + + Argument ${WORKLOAD_CONFIG} — ASTRA-sim 1.0.0 documentation + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

Argument ${WORKLOAD_CONFIG}

+
--workload-configuration: path to the prefix of execution trace files
+
+
+

The naming rule for execution traces follows the format {path_prefix}.{npu_id}.eg.

+
+

Note

+

Execution traces can be created using Chakra tools. You have the option of using either execution trace generator (et_generator) or execution trace converter (et_converter).

+
+
+

Using Execution Trace Generator (et_generator)

+

et_generator can be used to define and generate any execution traces, functioning as a test case generator. You can generate execution traces with the following commands:

+
$ cd ${ASTRA_SIM}/extern/graph_frontend/chakra/et_generator
+$ cmake CMakeLists.txt && make -j$(nproc)
+$ ./et_generator --num_npus 64 --num_dims 1
+
+
+

To run one of the example traces (twoCompNodesDependent), execute the following command:

+
$ cd -
+$ ./build/astra_analytical/build/AnalyticalAstra/bin/AnalyticalAstra \
+  --workload-configuration=./extern/graph_frontend/chakra/et_generator/twoCompNodesDependent \
+  --system-configuration=./inputs/system/sample_fully_connected_sys.txt \
+  --network-configuration=./inputs/network/analytical/fully_connected.json \
+  --memory-configuration=./inputs/memory/analytical/no_memory_expansion.json
+
+
+

Upon completion, ASTRA-sim will display the number of cycles it took to run the simulation.

+
sys[0] finished, 10 cycles
+sys[1] finished, 10 cycles
+...
+sys[62] finished, 10 cycles
+sys[63] finished, 10 cycles
+
+
+
+
+

Using Execution Trace Converter (et_converter)

+

et_converter is a trace schema conversion tool, supporting PyTorch and FlexFlow execution traces, as well as ASTRA-sim 1.0 input files. You can convert ASTRA-sim 1.0 text input files into Chakra traces with the following commands:

+
$ cd ${ASTRA_SIM}/extern/graph_frontend/chakra/
+$ python3 setup.py install --user
+$ python3 -m et_converter.et_converter \
+    --input_type Text \
+    --input_filename ../../../inputs/workload/ASTRA-sim-1.0/Resnet50_DataParallel.txt \
+    --output_filename ../../../inputs/workload/ASTRA-sim-2.0/Resnet50_DataParallel \
+    --num_npus 64 \
+    --num_dims 1 \
+    --num_passes 1
+
+
+

Run the following command:

+
$ cd -
+$ ./build/astra_analytical/build/AnalyticalAstra/bin/AnalyticalAstra \
+  --workload-configuration=./inputs/workload/ASTRA-sim-2.0/Resnet50_DataParallel \
+  --system-configuration=./inputs/system/sample_fully_connected_sys.txt \
+  --network-configuration=./inputs/network/analytical/fully_connected.json \
+  --memory-configuration=./inputs/memory/analytical/no_memory_expansion.json
+
+
+

Upon completion, ASTRA-sim will display the number of cycles it took to run the simulation.

+
sys[62] finished, 187442108 cycles
+sys[61] finished, 187442108 cycles
+...
+sys[0] finished, 187442108 cycles
+sys[63] finished, 187442108 cycles
+
+
+
+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/pages/docs/_build/html/Getting Started.html b/pages/docs/_build/html/Getting Started.html index 8d3fca4..447617a 100644 --- a/pages/docs/_build/html/Getting Started.html +++ b/pages/docs/_build/html/Getting Started.html @@ -76,11 +76,13 @@

Getting StartedRunning ASTRA-sim
  • ASTRA-sim Output
  • diff --git a/pages/docs/_build/html/Running ASTRA-sim.html b/pages/docs/_build/html/Running ASTRA-sim.html index fe01d38..2062c5a 100644 --- a/pages/docs/_build/html/Running ASTRA-sim.html +++ b/pages/docs/_build/html/Running ASTRA-sim.html @@ -15,7 +15,7 @@ - + @@ -38,11 +38,9 @@
  • Getting Started
    • Installation
    • Running ASTRA-sim
    • ASTRA-sim Output
    • @@ -79,7 +77,7 @@

      Running ASTRA-sim

      Once ASTRA-sim is built, the executable ${BINARY} is located at:

      # For the analytical network backend
      -$ {ASTRA_SIM}/build/astra_analytical/build/AnalyticalAstra/bin/AnalyticalAstra
      +$ ${ASTRA_SIM}/build/astra_analytical/build/AnalyticalAstra/bin/AnalyticalAstra
       

      Conduct experiments by passing the required aruguments:

      @@ -90,167 +88,21 @@

      Running ASTRA-sim=${ASTRA_SIM}/inputs/memory/analytical/no_memory_expansion.json -
      -

      Note

      -

      Additional arguments may be required based on the type of network backend.

      -
      -
      -

      Argument ${WORKLOAD_CONFIG}

      -
      --workload-configuration: path to the prefix of execution trace files
      -
      -
      -

      The naming rule for execution traces follows the format {path_prefix}.{npu_id}.eg.

      -
      -

      Note

      -

      Execution traces can be created using Chakra tools. You have the option of using either execution trace generator (et_generator) or execution trace converter (et_converter).

      -
      -
      -
      -

      Argument ${SYSTEM_CONFIG}

      -
      --system-configuration: path to the system configuration file
      -
      -
      -

      Example system configurations can be found at:

      -
      ${ASTRA_SIM}/inputs/system/
      -
      -
      +
        -
      • scheduling-policy: (LIFO/FIFO)

        -

        The order we proritize collectives according based on their time of arrival. LIFO means that most recently created collectives have higher priority. While FIFO is the reverse.

        -
      • -
      • intra-dimension-scheduling: (FIFO/SCF)

        -

        The order we proritize collective chunks inside each dimension. FIFO means that the least recently created collectives have higher priority. SCF means that the smallest chunk have higher priority.

        -
      • -
      • inter-dimension-scheduling: (baseline/themis)

        -

        The order we proritize collective chunks across multiple dimensions. baseline means that the scheduling always uses a constant schedule for all chunks. themis means that the scheduling issues chunks to the dimension with least load.

        -
      • -
      • endpoint-delay: (int)

        -

        The time NPU spends processing a message after receiving it in terms of cycles.

        -
      • -
      • active-chunks-per-dimension: (int)

        -

        This corresponds to the Maximum number of chunks we like execute in parallel on -each logical dimesnion of topology.

        -
      • -
      • preferred-dataset-splits: (int)

        -

        The number of chunks we divide each collective into.

        -
      • -
      • all-reduce-implementation: (Dimension0Collective_Dimension1Collective_xxx_DimensionNCollective)

        -

        Here we can create a multiphase colective all-reduce algorithm and directly specify the collective algorithm type for each logical dimension. The available options (algorithms) are: ring, direct, doubleBinaryTree, oneRing, oneDirect.

        -

        For example, “ring_doubleBinaryTree” means we create a logical topology with 2 dimensions and we perform ring algorithm on the first dimension followed by double binary tree on the second dimension for the all-reduce pattern. Hence the number of physical dimension should be equal to the number of logical dimensions. The only exceptions are oneRing/oneDirect where we assume no matter how many physical dimensions we have, we create a one big logical ring/direct(AllToAll) topology where all NPUs are connected and perfrom a one phase ring/direct algorithm.

        -
      • +
      • Argument ${WORKLOAD_CONFIG} -
        -

        Note

        -

        oneRing and oneDirect is not available for Garnet Backend in this version.

        -
        -
          -
        • reduce-scatter-implementation: (Dimension0CollectiveAlg_Dimension1CollectiveAlg_xxx_DimensionNCollectiveAlg)

          -

          The same as “all-reduce-implementation:” but for reduce-scatter collective. The available options are: ring, direct, oneRing, oneDirect.

          -
        • -
        • all-gather-implementation: (Dimension0CollectiveAlg_Dimension1CollectiveAlg_xxx_DimensionNCollectiveAlg)

          -

          The same as “all-reduce-implementation:” but for all-gather collective. The available options (algorithms) are: ring, direct, oneRing, oneDirect.

          -
        • -
        • all-to-all-implementation: (Dimension0CollectiveAlg_Dimension1CollectiveAlg_xxx_DimensionNCollectiveAlg)

          -

          The same as “all-reduce-implementation:” but for all-to-all collective. The available options (algorithms) are: ring, direct, oneRing, oneDirect.

          -
        • -
        • collective-optimization: (baseline/localBWAware)

          -

          baseline issues allreduce across all dimensions to handle allreduce of single chunk. While for an N-dimensional network, localBWAware issues a series of reduce-scatters on all dimensions from dim1 to dimN-1, followed by all-reduce on dimN, and then series of all-gathers starting from dimN-1 to dim1. This optimization is used to reduce the chunk size as it goes to the next network dimensions.

        • +
        • Argument ${SYSTEM_CONFIG}
        • +
        • Argument ${NETWORK_CONFIG}
        -
        -

        Note

        -

        The default clock cycle period is 1ns (1 Ghz feq). This value is defined inside Sys.hh. One can change it to any number. It will be a configurable command line parameter in the later versions.

        -
      -
      -

      Argument ${NETWORK_CONFIG}

      -
      --network-configuration: path to the network configuration file
      -
      -
      -

      Example network configurations can be found at

      -
      ${ASTRA_SIM}/inputs/network/
      -
      -
      -
        -
      • topology-name: (string) put “Hierarchical”

      • -
      • dimensions-count: (uint) number of network dimensions

      • -

      Note

      -

      Each configurations below is represented as an array of size dimensions-count, indexed by the dimension level.

      -
      -
        -
      • topologies-per-dim: (string) network topology (“Ring”, “FullyConnected”, or “Switch”)

      • -
      • dimension-type: (string) dimension type (“Tile”, “Package”, “Node”, or “Pod”)

      • -
      • units-count: (uint) number of GPUs

      • -
      • links-count: (uint) number of links

      • -
      • link-latency: (uint) link latency (ns)

      • -
      • link-bandwidth: (uint) link bandwidth (GB/s or B/ns)

      • -
      • nic-latency: (uint) nic latency (ns)

      • -
      • router-latency: (uint) router latency (ns)

      • -
      • hbm-latency: (uint) memory latency (ns)

      • -
      • hbm-bandwidth: (uint) memory bandwidth (GB/s or B/ns)

      • -
      • hbm-scale: (uint) memory scaling factor

      • -
      -
      -
      -

      Using Execution Trace Generator (et_generator)

      -

      et_generator can be used to define and generate any execution traces, functioning as a test case generator. You can generate execution traces with the following commands:

      -
      $ cd {ASTRA_SIM}/extern/graph_frontend/chakra/et_generator
      -$ cmake CMakeLists.txt && make -j$(nproc)
      -$ ./et_generator --num_npus 64 --num_dims 1
      -
      -
      -

      To run one of the example traces (twoCompNodesDependent), execute the following command:

      -
      $ cd -
      -$ ./build/astra_analytical/build/AnalyticalAstra/bin/AnalyticalAstra \
      -  --workload-configuration=./extern/graph_frontend/chakra/et_generator/twoCompNodesDependent \
      -  --system-configuration=./inputs/system/sample_fully_connected_sys.txt \
      -  --network-configuration=./inputs/network/analytical/fully_connected.json \
      -  --memory-configuration=./inputs/memory/analytical/no_memory_expansion.json
      -
      -
      -

      Upon completion, ASTRA-sim will display the number of cycles it took to run the simulation.

      -
      sys[0] finished, 10 cycles
      -sys[1] finished, 10 cycles
      -...
      -sys[62] finished, 10 cycles
      -sys[63] finished, 10 cycles
      -
      -
      -
      -
      -

      Using Execution Trace Converter (et_converter)

      -

      et_converter is a trace schema conversion tool, supporting PyTorch and FlexFlow execution traces, as well as ASTRA-sim 1.0 input files. You can convert ASTRA-sim 1.0 text input files into Chakra traces with the following commands:

      -
      $ cd {ASTRA_SIM}/extern/graph_frontend/chakra/
      -$ python3 setup.py install --user
      -$ python3 -m et_converter.et_converter \
      -    --input_type Text \
      -    --input_filename ../../../inputs/workload/ASTRA-sim-1.0/Resnet50_DataParallel.txt \
      -    --output_filename ../../../inputs/workload/ASTRA-sim-2.0/Resnet50_DataParallel \
      -    --num_npus 64 \
      -    --num_dims 1 \
      -    --num_passes 1
      -
      -
      -

      Run the following command:

      -
      $ cd -
      -$ ./build/astra_analytical/build/AnalyticalAstra/bin/AnalyticalAstra \
      -  --workload-configuration=./inputs/workload/ASTRA-sim-2.0/Resnet50_DataParallel \
      -  --system-configuration=./inputs/system/sample_fully_connected_sys.txt \
      -  --network-configuration=./inputs/network/analytical/fully_connected.json \
      -  --memory-configuration=./inputs/memory/analytical/no_memory_expansion.json
      -
      -
      -

      Upon completion, ASTRA-sim will display the number of cycles it took to run the simulation.

      -
      sys[62] finished, 187442108 cycles
      -sys[61] finished, 187442108 cycles
      -...
      -sys[0] finished, 187442108 cycles
      -sys[63] finished, 187442108 cycles
      -
      +

      Additional arguments may be required based on the type of network backend.

      -
      @@ -258,7 +110,7 @@

      Using Execution Trace Converter (et_converter) - +
      diff --git a/pages/docs/_build/html/_sources/Argument ${NETWORK_CONFIG}.rst.txt b/pages/docs/_build/html/_sources/Argument ${NETWORK_CONFIG}.rst.txt new file mode 100644 index 0000000..ca2c40a --- /dev/null +++ b/pages/docs/_build/html/_sources/Argument ${NETWORK_CONFIG}.rst.txt @@ -0,0 +1,41 @@ +Argument ${NETWORK_CONFIG} +========================== + +.. code-block:: console + + --network-configuration: path to the network configuration file + +Example network configurations can be found at + +.. code-block:: console + + ${ASTRA_SIM}/inputs/network/ + +* **topology-name**: (string) put "Hierarchical" + +* **dimensions-count**: (uint) number of network dimensions + +.. note:: + Each configurations below is represented as an array of size **dimensions-count**, indexed by the dimension level. + +* **topologies-per-dim**: (string) network topology ("Ring", "FullyConnected", or "Switch") + +* **dimension-type**: (string) dimension type ("Tile", "Package", "Node", or "Pod") + +* **units-count**: (uint) number of GPUs + +* **links-count**: (uint) number of links + +* **link-latency**: (uint) link latency (ns) + +* **link-bandwidth**: (uint) link bandwidth (GB/s or B/ns) + +* **nic-latency**: (uint) nic latency (ns) + +* **router-latency**: (uint) router latency (ns) + +* **hbm-latency**: (uint) memory latency (ns) + +* **hbm-bandwidth**: (uint) memory bandwidth (GB/s or B/ns) + +* **hbm-scale**: (uint) memory scaling factor diff --git a/pages/docs/_build/html/_sources/Argument ${SYSTEM_CONFIG}.rst.txt b/pages/docs/_build/html/_sources/Argument ${SYSTEM_CONFIG}.rst.txt new file mode 100644 index 0000000..0ed5474 --- /dev/null +++ b/pages/docs/_build/html/_sources/Argument ${SYSTEM_CONFIG}.rst.txt @@ -0,0 +1,66 @@ +Argument ${SYSTEM_CONFIG} +========================= + +.. code-block:: console + + --system-configuration: path to the system configuration file + +Example system configurations can be found at: + +.. code-block:: console + + ${ASTRA_SIM}/inputs/system/ + +* **scheduling-policy**: (LIFO/FIFO) + + The order we proritize collectives according based on their time of arrival. LIFO means that most recently created collectives have higher priority. While FIFO is the reverse. + +* **intra-dimension-scheduling**: (FIFO/SCF) + + The order we proritize collective chunks inside each dimension. FIFO means that the least recently created collectives have higher priority. SCF means that the smallest chunk have higher priority. + +* **inter-dimension-scheduling**: (baseline/themis) + + The order we proritize collective chunks across multiple dimensions. baseline means that the scheduling always uses a constant schedule for all chunks. themis means that the scheduling issues chunks to the dimension with least load. + +* **endpoint-delay**: (int) + + The time NPU spends processing a message after receiving it in terms of cycles. + +* **active-chunks-per-dimension**: (int) + + This corresponds to the Maximum number of chunks we like execute in parallel on + each logical dimesnion of topology. + +* **preferred-dataset-splits**: (int) + + The number of chunks we divide each collective into. + +* **all-reduce-implementation**: (Dimension0Collective_Dimension1Collective_xxx_DimensionNCollective) + + Here we can create a multiphase colective all-reduce algorithm and directly specify the collective algorithm type for each logical dimension. The available options (algorithms) are: ring, direct, doubleBinaryTree, oneRing, oneDirect. + + For example, "ring_doubleBinaryTree" means we create a logical topology with 2 dimensions and we perform ring algorithm on the first dimension followed by double binary tree on the second dimension for the all-reduce pattern. Hence the number of physical dimension should be equal to the number of logical dimensions. The only exceptions are oneRing/oneDirect where we assume no matter how many physical dimensions we have, we create a one big logical ring/direct(AllToAll) topology where all NPUs are connected and perfrom a one phase ring/direct algorithm. + +.. note:: + + oneRing and oneDirect is not available for Garnet Backend in this version. + +* **reduce-scatter-implementation**: (Dimension0CollectiveAlg_Dimension1CollectiveAlg_xxx_DimensionNCollectiveAlg) + + The same as "all-reduce-implementation:" but for reduce-scatter collective. The available options are: ring, direct, oneRing, oneDirect. + +* **all-gather-implementation**: (Dimension0CollectiveAlg_Dimension1CollectiveAlg_xxx_DimensionNCollectiveAlg) + + The same as "all-reduce-implementation:" but for all-gather collective. The available options (algorithms) are: ring, direct, oneRing, oneDirect. + +* **all-to-all-implementation**: (Dimension0CollectiveAlg_Dimension1CollectiveAlg_xxx_DimensionNCollectiveAlg) + + The same as "all-reduce-implementation:" but for all-to-all collective. The available options (algorithms) are: ring, direct, oneRing, oneDirect. + +* **collective-optimization**: (baseline/localBWAware) + + baseline issues allreduce across all dimensions to handle allreduce of single chunk. While for an N-dimensional network, localBWAware issues a series of reduce-scatters on all dimensions from dim1 to dimN-1, followed by all-reduce on dimN, and then series of all-gathers starting from dimN-1 to dim1. This optimization is used to reduce the chunk size as it goes to the next network dimensions. + +.. note:: + The default clock cycle period is 1ns (1 Ghz feq). This value is defined inside Sys.hh. One can change it to any number. It will be a configurable command line parameter in the later versions. diff --git a/pages/docs/_build/html/_sources/Argument ${WORKLOAD_CONFIG}.rst.txt b/pages/docs/_build/html/_sources/Argument ${WORKLOAD_CONFIG}.rst.txt new file mode 100644 index 0000000..37b88fc --- /dev/null +++ b/pages/docs/_build/html/_sources/Argument ${WORKLOAD_CONFIG}.rst.txt @@ -0,0 +1,85 @@ +Argument ${WORKLOAD_CONFIG} +=========================== + +.. code-block:: console + + --workload-configuration: path to the prefix of execution trace files + +The naming rule for execution traces follows the format {path_prefix}.{npu_id}.eg. + +.. note:: + Execution traces can be created using Chakra tools. You have the option of using either execution trace generator (:ref:`et_generator `) or execution trace converter (:ref:`et_converter `). + +.. _et_generator: + +Using Execution Trace Generator (et_generator) +---------------------------------------------- + +et_generator can be used to define and generate any execution traces, functioning as a test case generator. You can generate execution traces with the following commands: + +.. code-block:: console + + $ cd ${ASTRA_SIM}/extern/graph_frontend/chakra/et_generator + $ cmake CMakeLists.txt && make -j$(nproc) + $ ./et_generator --num_npus 64 --num_dims 1 + +To run one of the example traces (twoCompNodesDependent), execute the following command: + +.. code-block:: console + + $ cd - + $ ./build/astra_analytical/build/AnalyticalAstra/bin/AnalyticalAstra \ + --workload-configuration=./extern/graph_frontend/chakra/et_generator/twoCompNodesDependent \ + --system-configuration=./inputs/system/sample_fully_connected_sys.txt \ + --network-configuration=./inputs/network/analytical/fully_connected.json \ + --memory-configuration=./inputs/memory/analytical/no_memory_expansion.json + +Upon completion, ASTRA-sim will display the number of cycles it took to run the simulation. + +.. code-block:: console + + sys[0] finished, 10 cycles + sys[1] finished, 10 cycles + ... + sys[62] finished, 10 cycles + sys[63] finished, 10 cycles + +.. _et_converter: + +Using Execution Trace Converter (et_converter) +---------------------------------------------- + +et_converter is a trace schema conversion tool, supporting PyTorch and FlexFlow execution traces, as well as ASTRA-sim 1.0 input files. You can convert ASTRA-sim 1.0 text input files into Chakra traces with the following commands: + +.. code-block:: console + + $ cd ${ASTRA_SIM}/extern/graph_frontend/chakra/ + $ python3 setup.py install --user + $ python3 -m et_converter.et_converter \ + --input_type Text \ + --input_filename ../../../inputs/workload/ASTRA-sim-1.0/Resnet50_DataParallel.txt \ + --output_filename ../../../inputs/workload/ASTRA-sim-2.0/Resnet50_DataParallel \ + --num_npus 64 \ + --num_dims 1 \ + --num_passes 1 + +Run the following command: + +.. code-block:: console + + $ cd - + $ ./build/astra_analytical/build/AnalyticalAstra/bin/AnalyticalAstra \ + --workload-configuration=./inputs/workload/ASTRA-sim-2.0/Resnet50_DataParallel \ + --system-configuration=./inputs/system/sample_fully_connected_sys.txt \ + --network-configuration=./inputs/network/analytical/fully_connected.json \ + --memory-configuration=./inputs/memory/analytical/no_memory_expansion.json + +Upon completion, ASTRA-sim will display the number of cycles it took to run the simulation. + +.. code-block:: console + + sys[62] finished, 187442108 cycles + sys[61] finished, 187442108 cycles + ... + sys[0] finished, 187442108 cycles + sys[63] finished, 187442108 cycles diff --git a/pages/docs/_build/html/_sources/Running ASTRA-sim.rst.txt b/pages/docs/_build/html/_sources/Running ASTRA-sim.rst.txt index 45aadb1..b1d2748 100644 --- a/pages/docs/_build/html/_sources/Running ASTRA-sim.rst.txt +++ b/pages/docs/_build/html/_sources/Running ASTRA-sim.rst.txt @@ -6,7 +6,7 @@ Once ASTRA-sim is built, the executable ${BINARY} is located at: .. code-block:: console # For the analytical network backend - $ {ASTRA_SIM}/build/astra_analytical/build/AnalyticalAstra/bin/AnalyticalAstra + $ ${ASTRA_SIM}/build/astra_analytical/build/AnalyticalAstra/bin/AnalyticalAstra Conduct experiments by passing the required aruguments: @@ -18,198 +18,11 @@ Conduct experiments by passing the required aruguments: --network-configuration=${NETWORK_CONFIG} \ --memory-configuration=${ASTRA_SIM}/inputs/memory/analytical/no_memory_expansion.json -.. note:: - Additional arguments may be required based on the type of network backend. - -Argument ${WORKLOAD_CONFIG} ---------------------------- - -.. code-block:: console +.. toctree:: - --workload-configuration: path to the prefix of execution trace files - -The naming rule for execution traces follows the format {path_prefix}.{npu_id}.eg. + Argument ${WORKLOAD_CONFIG} + Argument ${SYSTEM_CONFIG} + Argument ${NETWORK_CONFIG} .. note:: - Execution traces can be created using Chakra tools. You have the option of using either execution trace generator (et_generator) or execution trace converter (et_converter). - -Argument ${SYSTEM_CONFIG} -------------------------- - -.. code-block:: console - - --system-configuration: path to the system configuration file - -Example system configurations can be found at: - -.. code-block:: console - - ${ASTRA_SIM}/inputs/system/ - -* **scheduling-policy**: (LIFO/FIFO) - - The order we proritize collectives according based on their time of arrival. LIFO means that most recently created collectives have higher priority. While FIFO is the reverse. - -* **intra-dimension-scheduling**: (FIFO/SCF) - - The order we proritize collective chunks inside each dimension. FIFO means that the least recently created collectives have higher priority. SCF means that the smallest chunk have higher priority. - -* **inter-dimension-scheduling**: (baseline/themis) - - The order we proritize collective chunks across multiple dimensions. baseline means that the scheduling always uses a constant schedule for all chunks. themis means that the scheduling issues chunks to the dimension with least load. - -* **endpoint-delay**: (int) - - The time NPU spends processing a message after receiving it in terms of cycles. - -* **active-chunks-per-dimension**: (int) - - This corresponds to the Maximum number of chunks we like execute in parallel on - each logical dimesnion of topology. - -* **preferred-dataset-splits**: (int) - - The number of chunks we divide each collective into. - -* **all-reduce-implementation**: (Dimension0Collective_Dimension1Collective_xxx_DimensionNCollective) - - Here we can create a multiphase colective all-reduce algorithm and directly specify the collective algorithm type for each logical dimension. The available options (algorithms) are: ring, direct, doubleBinaryTree, oneRing, oneDirect. - - For example, "ring_doubleBinaryTree" means we create a logical topology with 2 dimensions and we perform ring algorithm on the first dimension followed by double binary tree on the second dimension for the all-reduce pattern. Hence the number of physical dimension should be equal to the number of logical dimensions. The only exceptions are oneRing/oneDirect where we assume no matter how many physical dimensions we have, we create a one big logical ring/direct(AllToAll) topology where all NPUs are connected and perfrom a one phase ring/direct algorithm. - -.. note:: - - oneRing and oneDirect is not available for Garnet Backend in this version. - -* **reduce-scatter-implementation**: (Dimension0CollectiveAlg_Dimension1CollectiveAlg_xxx_DimensionNCollectiveAlg) - - The same as "all-reduce-implementation:" but for reduce-scatter collective. The available options are: ring, direct, oneRing, oneDirect. - -* **all-gather-implementation**: (Dimension0CollectiveAlg_Dimension1CollectiveAlg_xxx_DimensionNCollectiveAlg) - - The same as "all-reduce-implementation:" but for all-gather collective. The available options (algorithms) are: ring, direct, oneRing, oneDirect. - -* **all-to-all-implementation**: (Dimension0CollectiveAlg_Dimension1CollectiveAlg_xxx_DimensionNCollectiveAlg) - - The same as "all-reduce-implementation:" but for all-to-all collective. The available options (algorithms) are: ring, direct, oneRing, oneDirect. - -* **collective-optimization**: (baseline/localBWAware) - - baseline issues allreduce across all dimensions to handle allreduce of single chunk. While for an N-dimensional network, localBWAware issues a series of reduce-scatters on all dimensions from dim1 to dimN-1, followed by all-reduce on dimN, and then series of all-gathers starting from dimN-1 to dim1. This optimization is used to reduce the chunk size as it goes to the next network dimensions. - -.. note:: - The default clock cycle period is 1ns (1 Ghz feq). This value is defined inside Sys.hh. One can change it to any number. It will be a configurable command line parameter in the later versions. - -Argument ${NETWORK_CONFIG} --------------------------- - -.. code-block:: console - - --network-configuration: path to the network configuration file - -Example network configurations can be found at - -.. code-block:: console - - ${ASTRA_SIM}/inputs/network/ - -* **topology-name**: (string) put "Hierarchical" - -* **dimensions-count**: (uint) number of network dimensions - -.. note:: - Each configurations below is represented as an array of size **dimensions-count**, indexed by the dimension level. - -* **topologies-per-dim**: (string) network topology ("Ring", "FullyConnected", or "Switch") - -* **dimension-type**: (string) dimension type ("Tile", "Package", "Node", or "Pod") - -* **units-count**: (uint) number of GPUs - -* **links-count**: (uint) number of links - -* **link-latency**: (uint) link latency (ns) - -* **link-bandwidth**: (uint) link bandwidth (GB/s or B/ns) - -* **nic-latency**: (uint) nic latency (ns) - -* **router-latency**: (uint) router latency (ns) - -* **hbm-latency**: (uint) memory latency (ns) - -* **hbm-bandwidth**: (uint) memory bandwidth (GB/s or B/ns) - -* **hbm-scale**: (uint) memory scaling factor - -Using Execution Trace Generator (et_generator) ----------------------------------------------- - -et_generator can be used to define and generate any execution traces, functioning as a test case generator. You can generate execution traces with the following commands: - -.. code-block:: console - - $ cd {ASTRA_SIM}/extern/graph_frontend/chakra/et_generator - $ cmake CMakeLists.txt && make -j$(nproc) - $ ./et_generator --num_npus 64 --num_dims 1 - -To run one of the example traces (twoCompNodesDependent), execute the following command: - -.. code-block:: console - - $ cd - - $ ./build/astra_analytical/build/AnalyticalAstra/bin/AnalyticalAstra \ - --workload-configuration=./extern/graph_frontend/chakra/et_generator/twoCompNodesDependent \ - --system-configuration=./inputs/system/sample_fully_connected_sys.txt \ - --network-configuration=./inputs/network/analytical/fully_connected.json \ - --memory-configuration=./inputs/memory/analytical/no_memory_expansion.json - -Upon completion, ASTRA-sim will display the number of cycles it took to run the simulation. - -.. code-block:: console - - sys[0] finished, 10 cycles - sys[1] finished, 10 cycles - ... - sys[62] finished, 10 cycles - sys[63] finished, 10 cycles - -Using Execution Trace Converter (et_converter) ----------------------------------------------- - -et_converter is a trace schema conversion tool, supporting PyTorch and FlexFlow execution traces, as well as ASTRA-sim 1.0 input files. You can convert ASTRA-sim 1.0 text input files into Chakra traces with the following commands: - -.. code-block:: console - - $ cd {ASTRA_SIM}/extern/graph_frontend/chakra/ - $ python3 setup.py install --user - $ python3 -m et_converter.et_converter \ - --input_type Text \ - --input_filename ../../../inputs/workload/ASTRA-sim-1.0/Resnet50_DataParallel.txt \ - --output_filename ../../../inputs/workload/ASTRA-sim-2.0/Resnet50_DataParallel \ - --num_npus 64 \ - --num_dims 1 \ - --num_passes 1 - -Run the following command: - -.. code-block:: console - - $ cd - - $ ./build/astra_analytical/build/AnalyticalAstra/bin/AnalyticalAstra \ - --workload-configuration=./inputs/workload/ASTRA-sim-2.0/Resnet50_DataParallel \ - --system-configuration=./inputs/system/sample_fully_connected_sys.txt \ - --network-configuration=./inputs/network/analytical/fully_connected.json \ - --memory-configuration=./inputs/memory/analytical/no_memory_expansion.json - -Upon completion, ASTRA-sim will display the number of cycles it took to run the simulation. - -.. code-block:: console - - sys[62] finished, 187442108 cycles - sys[61] finished, 187442108 cycles - ... - sys[0] finished, 187442108 cycles - sys[63] finished, 187442108 cycles - - + Additional arguments may be required based on the type of network backend. diff --git a/pages/docs/_build/html/index.html b/pages/docs/_build/html/index.html index 7a5b510..eb51b70 100644 --- a/pages/docs/_build/html/index.html +++ b/pages/docs/_build/html/index.html @@ -76,11 +76,13 @@

      Welcome to ASTRA-sim’s documentation!Running ASTRA-sim
    • ASTRA-sim Output
    • diff --git a/pages/docs/_build/html/objects.inv b/pages/docs/_build/html/objects.inv index a6bb2653658e08d1476ea28f7169b3221b80e131..5ca3f60d6b0c69180082052b8b7c1c0cb8393a33 100644 GIT binary patch delta 378 zcmV-=0fqkQ0_X#fcYmdl-AcnS6vyxT6p>kQ7Zn}Cz`MH5F?1iSmEqNh>7gsONoh}I zLj+&L*YinC+q6x!;KaLd{vW^Ow6^1}Z7U5Tvdl2fkfi8cdx8<`lGv4I4lYXde1}m; zEW=LxECPnabwD!sSm`1803Jn0Md5;$KTLX8LO39siLtqG&J@vms%ZgC->R9 YTM9H`yLPagl^LhhR}yP{0j6eV-4=+uVE_OC delta 250 zcmVgcYlSE&1%Ci5QOh~iY=i>l{UxT426c>PYNN#Zv{15JUzlX_N80)K+0c=zkaq%%WXmqv)YQ1E)!P@!$bwLeeGd5i9!gKW)9fgNWT#HZCGf>o& z+RUHq7Ry(w#16G|27%md!1?>+Ax48iOP8&8%sBN&+jQi`Nax<13L10oB;=Rq5fn}s z1uIANOa9k;tTMwFOq&nAR3fXo{~6UDX^8(}vCX5{%V7y7Eiqm4+gQtf0OXU$1!6mW A`2YX_ diff --git a/pages/docs/_build/html/searchindex.js b/pages/docs/_build/html/searchindex.js index e60894d..96316b7 100644 --- a/pages/docs/_build/html/searchindex.js +++ b/pages/docs/_build/html/searchindex.js @@ -1 +1 @@ -Search.setIndex({docnames:["ASTRA-sim Output","Getting Started","Installation","Running ASTRA-sim","index"],envversion:{"sphinx.domains.c":2,"sphinx.domains.changeset":1,"sphinx.domains.citation":1,"sphinx.domains.cpp":4,"sphinx.domains.index":1,"sphinx.domains.javascript":2,"sphinx.domains.math":2,"sphinx.domains.python":3,"sphinx.domains.rst":2,"sphinx.domains.std":2,"sphinx.ext.intersphinx":1,sphinx:56},filenames:["ASTRA-sim Output.rst","Getting Started.rst","Installation.rst","Running ASTRA-sim.rst","index.rst"],objects:{},objnames:{},objtypes:{},terms:{"0":[0,3],"1":[0,2,3],"10":3,"1300":0,"187442108":3,"1n":3,"1total":0,"2":[0,3],"25":0,"3":[0,2],"4":0,"400":0,"6":2,"61":3,"62":3,"63":3,"64":3,"646570":0,"case":3,"default":3,"function":3,"int":3,"switch":[0,3],"while":3,For:[2,3],It:3,One:3,The:[0,3],To:[2,3],accord:3,across:3,activ:[3,4],ad:0,addit:3,after:[2,3],algorithm:3,all:[2,3],allreduc:3,alltoal:3,alwai:3,an:3,analyt:[2,3],analyticalastra:3,ani:3,apt:2,ar:3,argument:[1,4],arrai:3,arriv:3,arugu:3,assum:3,astra:[1,2],astra_analyt:[2,3],astra_sim:3,avail:3,b:3,backend:[2,3],bandwidth:[0,3],base:[2,3],baselin:3,below:[0,3],big:3,bin:3,binari:3,build:[2,3],built:3,c:2,can:3,cd:[2,3],chakra:3,chang:3,chunk:3,clock:3,clone:2,cmake:[2,3],cmakelist:3,colect:3,collect:3,com:2,command:3,compil:2,complet:3,conduct:3,configur:3,connect:3,constant:3,convers:3,convert:[1,4],correspond:3,cost:0,costmodel:0,count:[0,3],creat:3,cycl:[0,3],dataset:3,defin:3,delai:3,depend:[1,4],dev:2,develop:4,dim1:3,dim:3,dimens:[0,3],dimension0collective_dimension1collective_xxx_dimensionncollect:3,dimension0collectivealg_dimension1collectivealg_xxx_dimensionncollectivealg:3,dimension:3,dimesnion:3,dimn:3,direct:3,directli:3,directori:2,displai:3,divid:3,doubl:3,doublebinarytre:3,download:2,each:3,eg:3,either:3,endpoint:3,equal:3,et_convert:[1,4],et_gener:[1,4],exampl:3,except:3,execut:[1,2,4],experi:3,extern:3,factor:3,feq:3,fifo:3,file:3,finish:[0,3],first:3,flexflow:3,follow:3,format:3,found:3,from:[2,3],fully_connect:3,fullyconnect:3,g:2,garnet:3,gather:3,gb:3,gcc:2,gener:[1,4],get:[2,4],ghz:3,git:2,github:[2,4],go:2,goe:3,gpu:3,graph_frontend:3,handl:3,have:[2,3],hbm:3,henc:3,here:3,hh:3,hierarch:3,higher:3,how:3,id:0,implement:3,index:[0,3],input:3,input_filenam:3,input_typ:3,insid:3,instal:[1,3,4],inter:[0,3],intra:3,issu:3,j:3,json:3,latenc:3,later:3,least:3,level:3,libboost:2,libprotobuf:2,lifo:3,like:3,line:3,link:[0,3],load:3,local:0,localbwawar:3,locat:3,logic:3,m:3,mai:3,make:[2,3],mani:3,matter:3,maximum:3,mean:3,memori:3,messag:3,most:3,multiphas:3,multipl:3,n:3,name:3,network:[2,3],network_config:[1,4],next:3,nic:[0,3],no_memory_expans:3,node:[0,3],nproc:3,npu:3,npu_id:3,ns:3,num_dim:3,num_npu:3,num_pass:3,number:3,offset:0,onc:3,one:3,onedirect:3,oner:3,onli:3,optim:3,option:[2,3],order:3,output:[1,4],output_filenam:3,packag:[1,3,4],page:4,parallel:3,paramet:3,pass:3,path:3,path_prefix:3,pattern:3,per:3,perform:3,perfrom:3,period:3,phase:3,physic:3,pip3:2,pip:2,pleas:2,pod:3,polici:3,prefer:3,prefix:3,print:0,prioriti:3,process:3,program:2,project:4,prorit:3,protobuf:2,put:3,py:3,pydot:2,python3:[2,3],pytorch:3,radix:0,receiv:3,recent:3,recurs:2,reduc:3,repres:3,requir:3,resnet50_dataparallel:3,revers:3,ring:[0,3],ring_doublebinarytre:3,router:3,rule:3,run:[1,4],s:3,same:3,sample_fully_connected_si:3,scale:3,scatter:3,scf:3,schedul:3,schema:3,script:2,search:4,second:3,see:2,seri:3,setup:3,sh:2,should:3,shown:0,sim:[1,2],simul:3,singl:3,size:3,smallest:3,specifi:3,spend:3,split:3,start:[3,4],string:3,submodul:2,sudo:2,support:3,sure:2,sy:[0,3],system:3,system_config:[1,4],target:2,term:3,termin:0,test:3,text:3,themi:3,thi:[3,4],tile:3,time:3,took:3,tool:3,topolog:3,total:0,trace:[1,4],tree:3,twocompnodesdepend:3,txt:3,type:3,uint:3,under:4,unit:3,updat:2,upon:3,us:[1,4],user:3,valu:3,version:3,we:3,well:3,where:3,workload:3,workload_config:[1,4],y:2,you:[2,3],your:2},titles:["ASTRA-sim Output","Getting Started","Installation","Running ASTRA-sim","Welcome to ASTRA-sim\u2019s documentation!"],titleterms:{argument:3,astra:[0,3,4],convert:3,depend:2,document:4,et_convert:3,et_gener:3,execut:3,gener:3,get:1,indic:4,instal:2,network_config:3,output:0,packag:2,run:3,s:4,sim:[0,3,4],start:1,system_config:3,tabl:4,trace:3,us:3,welcom:4,workload_config:3}}) \ No newline at end of file +Search.setIndex({docnames:["ASTRA-sim Output","Argument ${NETWORK_CONFIG}","Argument ${SYSTEM_CONFIG}","Argument ${WORKLOAD_CONFIG}","Getting Started","Installation","Running ASTRA-sim","index"],envversion:{"sphinx.domains.c":2,"sphinx.domains.changeset":1,"sphinx.domains.citation":1,"sphinx.domains.cpp":4,"sphinx.domains.index":1,"sphinx.domains.javascript":2,"sphinx.domains.math":2,"sphinx.domains.python":3,"sphinx.domains.rst":2,"sphinx.domains.std":2,"sphinx.ext.intersphinx":1,sphinx:56},filenames:["ASTRA-sim Output.rst","Argument ${NETWORK_CONFIG}.rst","Argument ${SYSTEM_CONFIG}.rst","Argument ${WORKLOAD_CONFIG}.rst","Getting Started.rst","Installation.rst","Running ASTRA-sim.rst","index.rst"],objects:{},objnames:{},objtypes:{},terms:{"0":[0,3],"1":[0,2,3,5],"10":3,"1300":0,"187442108":3,"1n":2,"1total":0,"2":[0,2,3],"25":0,"3":[0,5],"4":0,"400":0,"6":5,"61":3,"62":3,"63":3,"64":3,"646570":0,"case":3,"default":2,"function":3,"int":2,"switch":[0,1],"while":2,For:[2,5,6],It:2,One:2,The:[0,2,3],To:[3,5],accord:2,across:2,activ:[2,7],ad:0,addit:6,after:[2,5],algorithm:2,all:[2,5],allreduc:2,alltoal:2,alwai:2,an:[1,2],analyt:[3,5,6],analyticalastra:[3,6],ani:[2,3],apt:5,ar:2,argument:[4,6,7],arrai:1,arriv:2,arugu:6,assum:2,astra:[3,4,5],astra_analyt:[3,5,6],astra_sim:[1,2,3,6],avail:2,b:1,backend:[2,5,6],bandwidth:[0,1],base:[2,5,6],baselin:2,below:[0,1],big:2,bin:[3,6],binari:[2,6],build:[3,5,6],built:6,c:5,can:[1,2,3],cd:[3,5],chakra:3,chang:2,chunk:2,clock:2,clone:5,cmake:[3,5],cmakelist:3,colect:2,collect:2,com:5,command:[2,3],compil:5,complet:3,conduct:6,configur:[1,2,3,6],connect:2,constant:2,convers:3,convert:[4,6,7],correspond:2,cost:0,costmodel:0,count:[0,1],creat:[2,3],cycl:[0,2,3],dataset:2,defin:[2,3],delai:2,depend:[4,7],dev:5,develop:7,dim1:2,dim:1,dimens:[0,1,2],dimension0collective_dimension1collective_xxx_dimensionncollect:2,dimension0collectivealg_dimension1collectivealg_xxx_dimensionncollectivealg:2,dimension:2,dimesnion:2,dimn:2,direct:2,directli:2,directori:5,displai:3,divid:2,doubl:2,doublebinarytre:2,download:5,each:[1,2],eg:3,either:3,endpoint:2,equal:2,et_convert:[4,6,7],et_gener:[4,6,7],exampl:[1,2,3],except:2,execut:[2,4,5,6,7],experi:6,extern:3,factor:1,feq:2,fifo:2,file:[1,2,3],finish:[0,3],first:2,flexflow:3,follow:[2,3],format:3,found:[1,2],from:[2,5],fully_connect:3,fullyconnect:1,g:5,garnet:2,gather:2,gb:1,gcc:5,gener:[4,6,7],get:[5,7],ghz:2,git:5,github:[5,7],go:5,goe:2,gpu:1,graph_frontend:3,handl:2,have:[2,3,5],hbm:1,henc:2,here:2,hh:2,hierarch:1,higher:2,how:2,id:0,implement:2,index:[0,1],input:[1,2,3,6],input_filenam:3,input_typ:3,insid:2,instal:[3,4,7],inter:[0,2],intra:2,issu:2,j:3,json:[3,6],latenc:1,later:2,least:2,level:1,libboost:5,libprotobuf:5,lifo:2,like:2,line:2,link:[0,1],load:2,local:0,localbwawar:2,locat:6,logic:2,m:3,mai:6,make:[3,5],mani:2,matter:2,maximum:2,mean:2,memori:[1,3,6],messag:2,most:2,multiphas:2,multipl:2,n:2,name:[1,3],network:[1,2,3,5,6],network_config:[4,6,7],next:2,nic:[0,1],no_memory_expans:[3,6],node:[0,1],nproc:3,npu:2,npu_id:3,ns:1,num_dim:3,num_npu:3,num_pass:3,number:[1,2,3],offset:0,onc:6,one:[2,3],onedirect:2,oner:2,onli:2,optim:2,option:[2,3,5],order:2,output:[4,7],output_filenam:3,packag:[1,4,7],page:7,parallel:2,paramet:2,pass:6,path:[1,2,3],path_prefix:3,pattern:2,per:[1,2],perform:2,perfrom:2,period:2,phase:2,physic:2,pip3:5,pip:5,pleas:5,pod:1,polici:2,prefer:2,prefix:3,print:0,prioriti:2,process:2,program:5,project:7,prorit:2,protobuf:5,put:1,py:3,pydot:5,python3:[3,5],pytorch:3,radix:0,receiv:2,recent:2,recurs:5,reduc:2,repres:1,requir:6,resnet50_dataparallel:3,revers:2,ring:[0,1,2],ring_doublebinarytre:2,router:1,rule:3,run:[3,4,7],s:1,same:2,sample_fully_connected_si:3,scale:1,scatter:2,scf:2,schedul:2,schema:3,script:5,search:7,second:2,see:5,seri:2,setup:3,sh:5,should:2,shown:0,sim:[3,4,5],simul:3,singl:2,size:[1,2],smallest:2,specifi:2,spend:2,split:2,start:[2,7],string:1,submodul:5,sudo:5,support:3,sure:5,sy:[0,2,3],system:[2,3,6],system_config:[4,6,7],target:5,term:2,termin:0,test:3,text:3,themi:2,thi:[2,7],tile:1,time:2,took:3,tool:3,topolog:[1,2],total:0,trace:[4,6,7],tree:2,twocompnodesdepend:3,txt:3,type:[1,2,6],uint:1,under:7,unit:1,updat:5,upon:3,us:[2,4,6,7],user:3,valu:2,version:2,we:2,well:3,where:2,workload:[3,6],workload_config:[4,6,7],y:5,you:[3,5],your:5},titles:["ASTRA-sim Output","Argument ${NETWORK_CONFIG}","Argument ${SYSTEM_CONFIG}","Argument ${WORKLOAD_CONFIG}","Getting Started","Installation","Running ASTRA-sim","Welcome to ASTRA-sim\u2019s documentation!"],titleterms:{argument:[1,2,3],astra:[0,6,7],convert:3,depend:5,document:7,et_convert:3,et_gener:3,execut:3,gener:3,get:4,indic:7,instal:5,network_config:1,output:0,packag:5,run:6,s:7,sim:[0,6,7],start:4,system_config:2,tabl:7,trace:3,us:3,welcom:7,workload_config:3}}) \ No newline at end of file From 7a913abbc584ebec38d6a837848ac445c15f1566 Mon Sep 17 00:00:00 2001 From: zli Date: Sun, 22 Oct 2023 13:58:04 -0400 Subject: [PATCH 2/2] updated file names --- ...im Output.rst.txt => ASTRA-sim-Output.rst} | 0 ...G}.rst.txt => Argument-NETWORK-CONFIG.rst} | 0 ...IG}.rst.txt => Argument-SYSTEM-CONFIG.rst} | 0 ...}.rst.txt => Argument-WORKLOAD-CONFIG.rst} | 0 ...ng Started.rst.txt => Getting-Started.rst} | 4 +- ...STRA-sim.rst.txt => Running-ASTRA-sim.rst} | 6 +- ...-sim Output.html => ASTRA-sim-Output.html} | 12 +-- ...FIG}.html => Argument-NETWORK-CONFIG.html} | 24 ++--- ...NFIG}.html => Argument-SYSTEM-CONFIG.html} | 24 ++--- ...IG}.html => Argument-WORKLOAD-CONFIG.html} | 24 ++--- ...ting Started.html => Getting-Started.html} | 20 ++--- pages/docs/_build/html/Installation.html | 16 ++-- ... ASTRA-sim.html => Running-ASTRA-sim.html} | 28 +++--- .../html/_sources/ASTRA-sim-Output.rst.txt | 19 ++++ .../_sources/Argument-NETWORK-CONFIG.rst.txt | 41 +++++++++ .../_sources/Argument-SYSTEM-CONFIG.rst.txt | 66 ++++++++++++++ .../_sources/Argument-WORKLOAD-CONFIG.rst.txt | 85 ++++++++++++++++++ .../html/_sources/Getting-Started.rst.txt | 8 ++ .../html/_sources/Running-ASTRA-sim.rst.txt | 28 ++++++ pages/docs/_build/html/_sources/index.rst.txt | 2 +- pages/docs/_build/html/genindex.html | 2 +- pages/docs/_build/html/index.html | 22 ++--- pages/docs/_build/html/objects.inv | Bin 488 -> 489 bytes pages/docs/_build/html/search.html | 2 +- pages/docs/_build/html/searchindex.js | 2 +- pages/docs/index.rst | 2 +- 26 files changed, 342 insertions(+), 95 deletions(-) rename pages/docs/{_build/html/_sources/ASTRA-sim Output.rst.txt => ASTRA-sim-Output.rst} (100%) rename pages/docs/{_build/html/_sources/Argument ${NETWORK_CONFIG}.rst.txt => Argument-NETWORK-CONFIG.rst} (100%) rename pages/docs/{_build/html/_sources/Argument ${SYSTEM_CONFIG}.rst.txt => Argument-SYSTEM-CONFIG.rst} (100%) rename pages/docs/{_build/html/_sources/Argument ${WORKLOAD_CONFIG}.rst.txt => Argument-WORKLOAD-CONFIG.rst} (100%) rename pages/docs/{_build/html/_sources/Getting Started.rst.txt => Getting-Started.rst} (61%) rename pages/docs/{_build/html/_sources/Running ASTRA-sim.rst.txt => Running-ASTRA-sim.rst} (88%) rename pages/docs/_build/html/{ASTRA-sim Output.html => ASTRA-sim-Output.html} (90%) rename pages/docs/_build/html/{Argument ${NETWORK_CONFIG}.html => Argument-NETWORK-CONFIG.html} (83%) rename pages/docs/_build/html/{Argument ${SYSTEM_CONFIG}.html => Argument-SYSTEM-CONFIG.html} (88%) rename pages/docs/_build/html/{Argument ${WORKLOAD_CONFIG}.html => Argument-WORKLOAD-CONFIG.html} (89%) rename pages/docs/_build/html/{Getting Started.html => Getting-Started.html} (86%) rename pages/docs/_build/html/{Running ASTRA-sim.html => Running-ASTRA-sim.html} (83%) create mode 100644 pages/docs/_build/html/_sources/ASTRA-sim-Output.rst.txt create mode 100644 pages/docs/_build/html/_sources/Argument-NETWORK-CONFIG.rst.txt create mode 100644 pages/docs/_build/html/_sources/Argument-SYSTEM-CONFIG.rst.txt create mode 100644 pages/docs/_build/html/_sources/Argument-WORKLOAD-CONFIG.rst.txt create mode 100644 pages/docs/_build/html/_sources/Getting-Started.rst.txt create mode 100644 pages/docs/_build/html/_sources/Running-ASTRA-sim.rst.txt diff --git a/pages/docs/_build/html/_sources/ASTRA-sim Output.rst.txt b/pages/docs/ASTRA-sim-Output.rst similarity index 100% rename from pages/docs/_build/html/_sources/ASTRA-sim Output.rst.txt rename to pages/docs/ASTRA-sim-Output.rst diff --git a/pages/docs/_build/html/_sources/Argument ${NETWORK_CONFIG}.rst.txt b/pages/docs/Argument-NETWORK-CONFIG.rst similarity index 100% rename from pages/docs/_build/html/_sources/Argument ${NETWORK_CONFIG}.rst.txt rename to pages/docs/Argument-NETWORK-CONFIG.rst diff --git a/pages/docs/_build/html/_sources/Argument ${SYSTEM_CONFIG}.rst.txt b/pages/docs/Argument-SYSTEM-CONFIG.rst similarity index 100% rename from pages/docs/_build/html/_sources/Argument ${SYSTEM_CONFIG}.rst.txt rename to pages/docs/Argument-SYSTEM-CONFIG.rst diff --git a/pages/docs/_build/html/_sources/Argument ${WORKLOAD_CONFIG}.rst.txt b/pages/docs/Argument-WORKLOAD-CONFIG.rst similarity index 100% rename from pages/docs/_build/html/_sources/Argument ${WORKLOAD_CONFIG}.rst.txt rename to pages/docs/Argument-WORKLOAD-CONFIG.rst diff --git a/pages/docs/_build/html/_sources/Getting Started.rst.txt b/pages/docs/Getting-Started.rst similarity index 61% rename from pages/docs/_build/html/_sources/Getting Started.rst.txt rename to pages/docs/Getting-Started.rst index 41b22b2..07bd9bd 100644 --- a/pages/docs/_build/html/_sources/Getting Started.rst.txt +++ b/pages/docs/Getting-Started.rst @@ -4,5 +4,5 @@ Getting Started .. toctree:: Installation - Running ASTRA-sim - ASTRA-sim Output + Running-ASTRA-sim + ASTRA-sim-Output diff --git a/pages/docs/_build/html/_sources/Running ASTRA-sim.rst.txt b/pages/docs/Running-ASTRA-sim.rst similarity index 88% rename from pages/docs/_build/html/_sources/Running ASTRA-sim.rst.txt rename to pages/docs/Running-ASTRA-sim.rst index b1d2748..8375498 100644 --- a/pages/docs/_build/html/_sources/Running ASTRA-sim.rst.txt +++ b/pages/docs/Running-ASTRA-sim.rst @@ -20,9 +20,9 @@ Conduct experiments by passing the required aruguments: .. toctree:: - Argument ${WORKLOAD_CONFIG} - Argument ${SYSTEM_CONFIG} - Argument ${NETWORK_CONFIG} + Argument-WORKLOAD-CONFIG + Argument-SYSTEM-CONFIG + Argument-NETWORK-CONFIG .. note:: Additional arguments may be required based on the type of network backend. diff --git a/pages/docs/_build/html/ASTRA-sim Output.html b/pages/docs/_build/html/ASTRA-sim-Output.html similarity index 90% rename from pages/docs/_build/html/ASTRA-sim Output.html rename to pages/docs/_build/html/ASTRA-sim-Output.html index d37fb86..b90d043 100644 --- a/pages/docs/_build/html/ASTRA-sim Output.html +++ b/pages/docs/_build/html/ASTRA-sim-Output.html @@ -15,7 +15,7 @@ - + @@ -34,9 +34,9 @@ diff --git a/pages/docs/_build/html/Installation.html b/pages/docs/_build/html/Installation.html index 7847e93..58a065d 100644 --- a/pages/docs/_build/html/Installation.html +++ b/pages/docs/_build/html/Installation.html @@ -15,8 +15,8 @@ - - + + @@ -35,13 +35,13 @@ diff --git a/pages/docs/_build/html/index.html b/pages/docs/_build/html/index.html index eb51b70..0fac73d 100644 --- a/pages/docs/_build/html/index.html +++ b/pages/docs/_build/html/index.html @@ -15,7 +15,7 @@ - + @@ -34,7 +34,7 @@ @@ -70,22 +70,22 @@

      Welcome to ASTRA-sim’s documentation!
        -
      • Getting Started
          +
        • Getting Started
          • Installation
          • -
          • Running ASTRA-sim @@ -104,7 +104,7 @@

            Indices and tables - +
            diff --git a/pages/docs/_build/html/objects.inv b/pages/docs/_build/html/objects.inv index 5ca3f60d6b0c69180082052b8b7c1c0cb8393a33..1908e15b90b8663fe7e350a854e8e819c0150ef2 100644 GIT binary patch delta 379 zcmV->0fheO1L*^hcYl?V!D_-l6h-&*6;td&7gOlAyP#I2)rd)vu4FoS#L#3WBrioN zh5kl=uV2!{8J&p{rMqzNyXQXM#0$cu$I2vQeua63w8ZEkQi8I3>YaFZ8_ZIxQmqoN zh^q`FviUe%`O7CZ^yiP$NsDxT!~l&+Z$FK_*7lc02d_amo_}2;w5M@_!BPFU z)8@{1PL6)xZFCus`ZJLcY}m4fNbxp?O>&kQ7Zn}Cz`MH5F?1iSmEqNh>7gsONoh}I zLj+&L*YinC+q6x!;KaLd{vW^Ow6^1}Z7U5Tvdl2fkfi8cdx8<`lGv4I4lYXde1}m; zEW=LxECPnabwD!sSm`1803Jn0Md5;$KTLX8LO39siLtqG&J@vms%ZgC->R9 YTM9H`yLPagl^LhhR}yP{0j6eV-53G8VgLXD diff --git a/pages/docs/_build/html/search.html b/pages/docs/_build/html/search.html index 0f97556..755e7c8 100644 --- a/pages/docs/_build/html/search.html +++ b/pages/docs/_build/html/search.html @@ -35,7 +35,7 @@ diff --git a/pages/docs/_build/html/searchindex.js b/pages/docs/_build/html/searchindex.js index 96316b7..87e47b6 100644 --- a/pages/docs/_build/html/searchindex.js +++ b/pages/docs/_build/html/searchindex.js @@ -1 +1 @@ -Search.setIndex({docnames:["ASTRA-sim Output","Argument ${NETWORK_CONFIG}","Argument ${SYSTEM_CONFIG}","Argument ${WORKLOAD_CONFIG}","Getting Started","Installation","Running ASTRA-sim","index"],envversion:{"sphinx.domains.c":2,"sphinx.domains.changeset":1,"sphinx.domains.citation":1,"sphinx.domains.cpp":4,"sphinx.domains.index":1,"sphinx.domains.javascript":2,"sphinx.domains.math":2,"sphinx.domains.python":3,"sphinx.domains.rst":2,"sphinx.domains.std":2,"sphinx.ext.intersphinx":1,sphinx:56},filenames:["ASTRA-sim Output.rst","Argument ${NETWORK_CONFIG}.rst","Argument ${SYSTEM_CONFIG}.rst","Argument ${WORKLOAD_CONFIG}.rst","Getting Started.rst","Installation.rst","Running ASTRA-sim.rst","index.rst"],objects:{},objnames:{},objtypes:{},terms:{"0":[0,3],"1":[0,2,3,5],"10":3,"1300":0,"187442108":3,"1n":2,"1total":0,"2":[0,2,3],"25":0,"3":[0,5],"4":0,"400":0,"6":5,"61":3,"62":3,"63":3,"64":3,"646570":0,"case":3,"default":2,"function":3,"int":2,"switch":[0,1],"while":2,For:[2,5,6],It:2,One:2,The:[0,2,3],To:[3,5],accord:2,across:2,activ:[2,7],ad:0,addit:6,after:[2,5],algorithm:2,all:[2,5],allreduc:2,alltoal:2,alwai:2,an:[1,2],analyt:[3,5,6],analyticalastra:[3,6],ani:[2,3],apt:5,ar:2,argument:[4,6,7],arrai:1,arriv:2,arugu:6,assum:2,astra:[3,4,5],astra_analyt:[3,5,6],astra_sim:[1,2,3,6],avail:2,b:1,backend:[2,5,6],bandwidth:[0,1],base:[2,5,6],baselin:2,below:[0,1],big:2,bin:[3,6],binari:[2,6],build:[3,5,6],built:6,c:5,can:[1,2,3],cd:[3,5],chakra:3,chang:2,chunk:2,clock:2,clone:5,cmake:[3,5],cmakelist:3,colect:2,collect:2,com:5,command:[2,3],compil:5,complet:3,conduct:6,configur:[1,2,3,6],connect:2,constant:2,convers:3,convert:[4,6,7],correspond:2,cost:0,costmodel:0,count:[0,1],creat:[2,3],cycl:[0,2,3],dataset:2,defin:[2,3],delai:2,depend:[4,7],dev:5,develop:7,dim1:2,dim:1,dimens:[0,1,2],dimension0collective_dimension1collective_xxx_dimensionncollect:2,dimension0collectivealg_dimension1collectivealg_xxx_dimensionncollectivealg:2,dimension:2,dimesnion:2,dimn:2,direct:2,directli:2,directori:5,displai:3,divid:2,doubl:2,doublebinarytre:2,download:5,each:[1,2],eg:3,either:3,endpoint:2,equal:2,et_convert:[4,6,7],et_gener:[4,6,7],exampl:[1,2,3],except:2,execut:[2,4,5,6,7],experi:6,extern:3,factor:1,feq:2,fifo:2,file:[1,2,3],finish:[0,3],first:2,flexflow:3,follow:[2,3],format:3,found:[1,2],from:[2,5],fully_connect:3,fullyconnect:1,g:5,garnet:2,gather:2,gb:1,gcc:5,gener:[4,6,7],get:[5,7],ghz:2,git:5,github:[5,7],go:5,goe:2,gpu:1,graph_frontend:3,handl:2,have:[2,3,5],hbm:1,henc:2,here:2,hh:2,hierarch:1,higher:2,how:2,id:0,implement:2,index:[0,1],input:[1,2,3,6],input_filenam:3,input_typ:3,insid:2,instal:[3,4,7],inter:[0,2],intra:2,issu:2,j:3,json:[3,6],latenc:1,later:2,least:2,level:1,libboost:5,libprotobuf:5,lifo:2,like:2,line:2,link:[0,1],load:2,local:0,localbwawar:2,locat:6,logic:2,m:3,mai:6,make:[3,5],mani:2,matter:2,maximum:2,mean:2,memori:[1,3,6],messag:2,most:2,multiphas:2,multipl:2,n:2,name:[1,3],network:[1,2,3,5,6],network_config:[4,6,7],next:2,nic:[0,1],no_memory_expans:[3,6],node:[0,1],nproc:3,npu:2,npu_id:3,ns:1,num_dim:3,num_npu:3,num_pass:3,number:[1,2,3],offset:0,onc:6,one:[2,3],onedirect:2,oner:2,onli:2,optim:2,option:[2,3,5],order:2,output:[4,7],output_filenam:3,packag:[1,4,7],page:7,parallel:2,paramet:2,pass:6,path:[1,2,3],path_prefix:3,pattern:2,per:[1,2],perform:2,perfrom:2,period:2,phase:2,physic:2,pip3:5,pip:5,pleas:5,pod:1,polici:2,prefer:2,prefix:3,print:0,prioriti:2,process:2,program:5,project:7,prorit:2,protobuf:5,put:1,py:3,pydot:5,python3:[3,5],pytorch:3,radix:0,receiv:2,recent:2,recurs:5,reduc:2,repres:1,requir:6,resnet50_dataparallel:3,revers:2,ring:[0,1,2],ring_doublebinarytre:2,router:1,rule:3,run:[3,4,7],s:1,same:2,sample_fully_connected_si:3,scale:1,scatter:2,scf:2,schedul:2,schema:3,script:5,search:7,second:2,see:5,seri:2,setup:3,sh:5,should:2,shown:0,sim:[3,4,5],simul:3,singl:2,size:[1,2],smallest:2,specifi:2,spend:2,split:2,start:[2,7],string:1,submodul:5,sudo:5,support:3,sure:5,sy:[0,2,3],system:[2,3,6],system_config:[4,6,7],target:5,term:2,termin:0,test:3,text:3,themi:2,thi:[2,7],tile:1,time:2,took:3,tool:3,topolog:[1,2],total:0,trace:[4,6,7],tree:2,twocompnodesdepend:3,txt:3,type:[1,2,6],uint:1,under:7,unit:1,updat:5,upon:3,us:[2,4,6,7],user:3,valu:2,version:2,we:2,well:3,where:2,workload:[3,6],workload_config:[4,6,7],y:5,you:[3,5],your:5},titles:["ASTRA-sim Output","Argument ${NETWORK_CONFIG}","Argument ${SYSTEM_CONFIG}","Argument ${WORKLOAD_CONFIG}","Getting Started","Installation","Running ASTRA-sim","Welcome to ASTRA-sim\u2019s documentation!"],titleterms:{argument:[1,2,3],astra:[0,6,7],convert:3,depend:5,document:7,et_convert:3,et_gener:3,execut:3,gener:3,get:4,indic:7,instal:5,network_config:1,output:0,packag:5,run:6,s:7,sim:[0,6,7],start:4,system_config:2,tabl:7,trace:3,us:3,welcom:7,workload_config:3}}) \ No newline at end of file +Search.setIndex({docnames:["ASTRA-sim-Output","Argument-NETWORK-CONFIG","Argument-SYSTEM-CONFIG","Argument-WORKLOAD-CONFIG","Getting-Started","Installation","Running-ASTRA-sim","index"],envversion:{"sphinx.domains.c":2,"sphinx.domains.changeset":1,"sphinx.domains.citation":1,"sphinx.domains.cpp":4,"sphinx.domains.index":1,"sphinx.domains.javascript":2,"sphinx.domains.math":2,"sphinx.domains.python":3,"sphinx.domains.rst":2,"sphinx.domains.std":2,"sphinx.ext.intersphinx":1,sphinx:56},filenames:["ASTRA-sim-Output.rst","Argument-NETWORK-CONFIG.rst","Argument-SYSTEM-CONFIG.rst","Argument-WORKLOAD-CONFIG.rst","Getting-Started.rst","Installation.rst","Running-ASTRA-sim.rst","index.rst"],objects:{},objnames:{},objtypes:{},terms:{"0":[0,3],"1":[0,2,3,5],"10":3,"1300":0,"187442108":3,"1n":2,"1total":0,"2":[0,2,3],"25":0,"3":[0,5],"4":0,"400":0,"6":5,"61":3,"62":3,"63":3,"64":3,"646570":0,"case":3,"default":2,"function":3,"int":2,"switch":[0,1],"while":2,For:[2,5,6],It:2,One:2,The:[0,2,3],To:[3,5],accord:2,across:2,activ:[2,7],ad:0,addit:6,after:[2,5],algorithm:2,all:[2,5],allreduc:2,alltoal:2,alwai:2,an:[1,2],analyt:[3,5,6],analyticalastra:[3,6],ani:[2,3],apt:5,ar:2,argument:[4,6,7],arrai:1,arriv:2,arugu:6,assum:2,astra:[3,4,5],astra_analyt:[3,5,6],astra_sim:[1,2,3,6],avail:2,b:1,backend:[2,5,6],bandwidth:[0,1],base:[2,5,6],baselin:2,below:[0,1],big:2,bin:[3,6],binari:[2,6],build:[3,5,6],built:6,c:5,can:[1,2,3],cd:[3,5],chakra:3,chang:2,chunk:2,clock:2,clone:5,cmake:[3,5],cmakelist:3,colect:2,collect:2,com:5,command:[2,3],compil:5,complet:3,conduct:6,configur:[1,2,3,6],connect:2,constant:2,convers:3,convert:[4,6,7],correspond:2,cost:0,costmodel:0,count:[0,1],creat:[2,3],cycl:[0,2,3],dataset:2,defin:[2,3],delai:2,depend:[4,7],dev:5,develop:7,dim1:2,dim:1,dimens:[0,1,2],dimension0collective_dimension1collective_xxx_dimensionncollect:2,dimension0collectivealg_dimension1collectivealg_xxx_dimensionncollectivealg:2,dimension:2,dimesnion:2,dimn:2,direct:2,directli:2,directori:5,displai:3,divid:2,doubl:2,doublebinarytre:2,download:5,each:[1,2],eg:3,either:3,endpoint:2,equal:2,et_convert:[4,6,7],et_gener:[4,6,7],exampl:[1,2,3],except:2,execut:[2,4,5,6,7],experi:6,extern:3,factor:1,feq:2,fifo:2,file:[1,2,3],finish:[0,3],first:2,flexflow:3,follow:[2,3],format:3,found:[1,2],from:[2,5],fully_connect:3,fullyconnect:1,g:5,garnet:2,gather:2,gb:1,gcc:5,gener:[4,6,7],get:[5,7],ghz:2,git:5,github:[5,7],go:5,goe:2,gpu:1,graph_frontend:3,handl:2,have:[2,3,5],hbm:1,henc:2,here:2,hh:2,hierarch:1,higher:2,how:2,id:0,implement:2,index:[0,1],input:[1,2,3,6],input_filenam:3,input_typ:3,insid:2,instal:[3,4,7],inter:[0,2],intra:2,issu:2,j:3,json:[3,6],latenc:1,later:2,least:2,level:1,libboost:5,libprotobuf:5,lifo:2,like:2,line:2,link:[0,1],load:2,local:0,localbwawar:2,locat:6,logic:2,m:3,mai:6,make:[3,5],mani:2,matter:2,maximum:2,mean:2,memori:[1,3,6],messag:2,most:2,multiphas:2,multipl:2,n:2,name:[1,3],network:[1,2,3,5,6],network_config:[4,6,7],next:2,nic:[0,1],no_memory_expans:[3,6],node:[0,1],nproc:3,npu:2,npu_id:3,ns:1,num_dim:3,num_npu:3,num_pass:3,number:[1,2,3],offset:0,onc:6,one:[2,3],onedirect:2,oner:2,onli:2,optim:2,option:[2,3,5],order:2,output:[4,7],output_filenam:3,packag:[1,4,7],page:7,parallel:2,paramet:2,pass:6,path:[1,2,3],path_prefix:3,pattern:2,per:[1,2],perform:2,perfrom:2,period:2,phase:2,physic:2,pip3:5,pip:5,pleas:5,pod:1,polici:2,prefer:2,prefix:3,print:0,prioriti:2,process:2,program:5,project:7,prorit:2,protobuf:5,put:1,py:3,pydot:5,python3:[3,5],pytorch:3,radix:0,receiv:2,recent:2,recurs:5,reduc:2,repres:1,requir:6,resnet50_dataparallel:3,revers:2,ring:[0,1,2],ring_doublebinarytre:2,router:1,rule:3,run:[3,4,7],s:1,same:2,sample_fully_connected_si:3,scale:1,scatter:2,scf:2,schedul:2,schema:3,script:5,search:7,second:2,see:5,seri:2,setup:3,sh:5,should:2,shown:0,sim:[3,4,5],simul:3,singl:2,size:[1,2],smallest:2,specifi:2,spend:2,split:2,start:[2,7],string:1,submodul:5,sudo:5,support:3,sure:5,sy:[0,2,3],system:[2,3,6],system_config:[4,6,7],target:5,term:2,termin:0,test:3,text:3,themi:2,thi:[2,7],tile:1,time:2,took:3,tool:3,topolog:[1,2],total:0,trace:[4,6,7],tree:2,twocompnodesdepend:3,txt:3,type:[1,2,6],uint:1,under:7,unit:1,updat:5,upon:3,us:[2,4,6,7],user:3,valu:2,version:2,we:2,well:3,where:2,workload:[3,6],workload_config:[4,6,7],y:5,you:[3,5],your:5},titles:["ASTRA-sim Output","Argument ${NETWORK_CONFIG}","Argument ${SYSTEM_CONFIG}","Argument ${WORKLOAD_CONFIG}","Getting Started","Installation","Running ASTRA-sim","Welcome to ASTRA-sim\u2019s documentation!"],titleterms:{argument:[1,2,3],astra:[0,6,7],convert:3,depend:5,document:7,et_convert:3,et_gener:3,execut:3,gener:3,get:4,indic:7,instal:5,network_config:1,output:0,packag:5,run:6,s:7,sim:[0,6,7],start:4,system_config:2,tabl:7,trace:3,us:3,welcom:7,workload_config:3}}) \ No newline at end of file diff --git a/pages/docs/index.rst b/pages/docs/index.rst index 077e375..bcc0401 100644 --- a/pages/docs/index.rst +++ b/pages/docs/index.rst @@ -9,7 +9,7 @@ Welcome to ASTRA-sim's documentation! .. toctree:: - Getting Started + Getting-Started Indices and tables ==================