Address CI test failures in pytorch/data (meta-pytorch#1219)

gokulavasan · facebook-github-bot · commit d727f63289ae · 2024-01-22T12:53:29.000-08:00
Summary: ### Changes - Remove torchvision tests from domain_ci. These tests are failing in CI and torchvision stopped maintenance of that part of the codebase (https://github.com/pytorch/vision/blob/main/.github/workflows/prototype-tests-linux-gpu.yml#L47-L49) - Change AIStore branch name - Run pre-commit hook Pull Request resolved: meta-pytorch#1219 Reviewed By: kartikayk, huydhn, ejguan Differential Revision: D52903377 Pulled By: gokulavasan fbshipit-source-id: 1deaee3b88b8c062cedd718e7c268019fc2b3065
diff --git a/.github/workflows/aistore_ci.yml b/.github/workflows/aistore_ci.yml
@@ -48,7 +48,7 @@ jobs:
           pip3 install -r requirements.txt
           pip3 install --pre torch -f "${{ steps.pytorch_channel.outputs.value }}"
       - name: Run AIStore local deployment
-        uses: NVIDIA/aistore@master
+        uses: NVIDIA/aistore@main
       - name: Build TorchData
         run: |
           pip3 install .
diff --git a/.github/workflows/domain_ci.yml b/.github/workflows/domain_ci.yml
@@ -10,55 +10,6 @@ on:
       - gh/*/*/base
 
 jobs:
-  torchvision:
-    if: ${{ github.repository_owner == 'pytorch' }}
-    runs-on: ${{ matrix.os }}
-    strategy:
-      fail-fast: false
-      matrix:
-        os:
-          - macos-latest
-          - ubuntu-latest
-          - windows-latest
-        python-version:
-          - 3.8
-          - 3.9
-    steps:
-      - name: Setup Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v4
-        with:
-          python-version: ${{ matrix.python-version }}
-
-      - name: Install torch and torchvision from nightlies
-        run: |
-          pip install numpy networkx
-          pip install --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cpu
-
-      - name: Check out torchdata repository
-        uses: actions/checkout@v3
-
-      - name: Install torchdata
-        run: |
-          pip install -r requirements.txt
-          pip install .
-
-      - name: Install test requirements
-        run: pip install pytest pytest-mock scipy iopath pycocotools h5py
-
-      - name: Extract torchvision ref
-        id: torchvision
-        run: echo "ref=$(python -c 'import torchvision; print(torchvision.version.git_version)')" >> $GITHUB_OUTPUT
-
-      - name: Check out torchvision repository
-        uses: actions/checkout@v3
-        with:
-          repository: pytorch/vision
-          ref: ${{ steps.torchvision.outputs.ref }}
-          path: vision
-
-      - name: Run torchvision builtin datasets tests
-        run: pytest --no-header -v vision/test/test_prototype_datasets_builtin.py
-
   torchtext:
     if: ${{ github.repository_owner == 'pytorch' }}
     runs-on: ${{ matrix.os }}
diff --git a/README.md b/README.md
@@ -4,7 +4,11 @@
 [**What are DataPipes?**](#what-are-datapipes) | [**Beta Usage and Feedback**](#beta-usage-and-feedback) |
 [**Contributing**](#contributing) | [**Future Plans**](#future-plans)
 
-**:warning: As of July 2023, we have paused active development on TorchData and have paused new releases. We have learnt a lot from building it and hearing from users, but also believe we need to re-evaluate the technical design and approach given how much the industry has changed since we began the project. During the rest of 2023 we will be re-evaluating our plans in this space. Please reach out if you suggestions or comments (please use [#1196](https://github.com/pytorch/data/issues/1196) for feedback).**
+**:warning: As of July 2023, we have paused active development on TorchData and have paused new releases. We have learnt
+a lot from building it and hearing from users, but also believe we need to re-evaluate the technical design and approach
+given how much the industry has changed since we began the project. During the rest of 2023 we will be re-evaluating our
+plans in this space. Please reach out if you suggestions or comments (please use
+[#1196](https://github.com/pytorch/data/issues/1196) for feedback).**
 
 `torchdata` is a library of common modular data loading primitives for easily constructing flexible and performant data
 pipelines.
diff --git a/examples/dataloader2/train_loop.py b/examples/dataloader2/train_loop.py
@@ -26,7 +26,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         """
         Simple model forward function
         """
-        return self.a + self.b * x + self.c * x**2 + self.d * x**3
+        return self.a + self.b * x + self.c * x ** 2 + self.d * x ** 3
 
 
 def main() -> None:
@@ -69,10 +69,7 @@ def main() -> None:
             running_loss += loss.item()
             # Print the loss every 2000 mini-batches.
             if step % 2000 == 1999:
-                print(
-                    "[epoch: %d, %5d] loss: %.3f"
-                    % (epoch + 1, step + 1, running_loss / 2000)
-                )
+                print("[epoch: %d, %5d] loss: %.3f" % (epoch + 1, step + 1, running_loss / 2000))
                 running_loss = 0.0
 
     print("Finished Training")
diff --git a/examples/dataloader2/train_loop_distributed_reading_service.py b/examples/dataloader2/train_loop_distributed_reading_service.py
@@ -31,7 +31,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         """
         Simple model forward function
         """
-        return self.a + self.b * x + self.c * x**2 + self.d * x**3
+        return self.a + self.b * x + self.c * x ** 2 + self.d * x ** 3
 
 
 def main() -> None:
@@ -84,10 +84,7 @@ def main() -> None:
 
             running_loss += loss.item()
             if step % 2000 == 1999:
-                print(
-                    "[epoch: %d, %5d] loss: %.3f"
-                    % (epoch + 1, step + 1, running_loss / 2000)
-                )
+                print("[epoch: %d, %5d] loss: %.3f" % (epoch + 1, step + 1, running_loss / 2000))
                 running_loss = 0.0
 
     print("Finished Training")
diff --git a/examples/dataloader2/train_loop_reading_service.py b/examples/dataloader2/train_loop_reading_service.py
@@ -27,7 +27,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         """
         Simple model forward function
         """
-        return self.a + self.b * x + self.c * x**2 + self.d * x**3
+        return self.a + self.b * x + self.c * x ** 2 + self.d * x ** 3
 
 
 def main() -> None:
@@ -63,10 +63,7 @@ def main() -> None:
 
             running_loss += loss.item()
             if step % 2000 == 1999:
-                print(
-                    "[epoch: %d, %5d] loss: %.3f"
-                    % (epoch + 1, step + 1, running_loss / 2000)
-                )
+                print("[epoch: %d, %5d] loss: %.3f" % (epoch + 1, step + 1, running_loss / 2000))
                 running_loss = 0.0
 
     print("Finished Training")
diff --git a/examples/dataloader2/train_loop_torchtext.py b/examples/dataloader2/train_loop_torchtext.py
@@ -24,9 +24,7 @@
 
 
 XLMR_VOCAB_PATH = r"https://download.pytorch.org/models/text/xlmr.vocab.pt"
-XLMR_SPM_MODEL_PATH = (
-    r"https://download.pytorch.org/models/text/xlmr.sentencepiece.bpe.model"
-)
+XLMR_SPM_MODEL_PATH = r"https://download.pytorch.org/models/text/xlmr.sentencepiece.bpe.model"
 
 DEVICE = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
 
@@ -70,9 +68,7 @@ def evaluate() -> None:
     counter = 0
     with torch.no_grad():
         for batch in eval_dataloader:
-            input = F.to_tensor(batch["token_ids"], padding_value=PADDING_IDX).to(
-                DEVICE
-            )
+            input = F.to_tensor(batch["token_ids"], padding_value=PADDING_IDX).to(DEVICE)
             target = torch.tensor(batch["target"]).to(DEVICE)
             loss, predictions = eval_step(input, target)
             total_loss += loss
@@ -101,9 +97,7 @@ def main() -> None:
     eval_dataloader = DataLoader2(datapipe=eval_datapipe)
     print("Created eval dataloader")
 
-    classifier_head = torchtext.models.RobertaClassificationHead(
-        num_classes=NUM_CLASSES, input_dim=INPUT_DIM
-    )
+    classifier_head = torchtext.models.RobertaClassificationHead(num_classes=NUM_CLASSES, input_dim=INPUT_DIM)
     model = torchtext.models.XLMR_BASE_ENCODER.get_model(head=classifier_head)
     model.to(DEVICE)
 
@@ -112,9 +106,7 @@ def main() -> None:
 
     for epoch in range(NUM_EPOCHS):
         for step, batch in enumerate(train_dataloader):
-            input = F.to_tensor(batch["token_ids"], padding_value=PADDING_IDX).to(
-                DEVICE
-            )
+            input = F.to_tensor(batch["token_ids"], padding_value=PADDING_IDX).to(DEVICE)
             target = torch.tensor(batch["target"]).to(DEVICE)
             train_step(input, target)