nivir
diff --git a/‎DGLPyTorch/DrugDiscovery/RoseTTAFold/Dockerfile‎
Lines changed: 73 additions & 0 deletions b/‎DGLPyTorch/DrugDiscovery/RoseTTAFold/Dockerfile‎
Lines changed: 73 additions & 0 deletions
diff --git a/‎DGLPyTorch/DrugDiscovery/RoseTTAFold/LICENSE‎
Lines changed: 21 additions & 0 deletions b/‎DGLPyTorch/DrugDiscovery/RoseTTAFold/LICENSE‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎DGLPyTorch/DrugDiscovery/RoseTTAFold/README-ROSETTAFOLD.md‎
Lines changed: 94 additions & 0 deletions b/‎DGLPyTorch/DrugDiscovery/RoseTTAFold/README-ROSETTAFOLD.md‎
Lines changed: 94 additions & 0 deletions
diff --git a/‎DGLPyTorch/DrugDiscovery/RoseTTAFold/README.md‎
Lines changed: 138 additions & 0 deletions b/‎DGLPyTorch/DrugDiscovery/RoseTTAFold/README.md‎
Lines changed: 138 additions & 0 deletions
@@ -0,0 +1,73 @@
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+ARG FROM_IMAGE_NAME=nvcr.io/nvidia/pytorch:21.09-py3
+
+FROM ${FROM_IMAGE_NAME} AS dgl_builder
+ENV DEBIAN_FRONTEND=noninteractive
+RUN apt-get update \
+    && apt-get install -y git build-essential python3-dev make cmake \
+    && rm -rf /var/lib/apt/lists/*
+WORKDIR /dgl
+RUN git clone --branch v0.7.0 --recurse-submodules --depth 1 https://github.com/dmlc/dgl.git .
+RUN sed -i 's/"35 50 60 70"/"60 70 80"/g' cmake/modules/CUDA.cmake
+WORKDIR build
+RUN cmake -DUSE_CUDA=ON -DUSE_FP16=ON ..
+RUN make -j8
+
+
+FROM ${FROM_IMAGE_NAME}
+
+# VERY IMPORTANT, DO NOT REMOVE:
+ENV FORCE_CUDA 1
+RUN pip install -v torch-geometric
+RUN pip install -v torch-scatter
+RUN pip install -v torch-sparse
+RUN pip install -v torch-cluster
+RUN pip install -v torch-spline-conv
+
+
+# copy built DGL and install it
+COPY --from=dgl_builder /dgl ./dgl
+RUN cd dgl/python && python setup.py install && cd ../.. && rm -rf dgl
+ENV DGLBACKEND=pytorch
+#RUN pip install dgl-cu111 -f https://data.dgl.ai/wheels/repo.html
+
+
+# HH-Suite
+RUN git clone https://github.com/soedinglab/hh-suite.git && \
+    mkdir -p hh-suite/build
+WORKDIR hh-suite/build
+RUN cmake .. && \
+    make && \
+    make install
+
+
+# PSIPRED
+WORKDIR /workspace
+RUN wget http://wwwuser.gwdg.de/~compbiol/data/csblast/releases/csblast-2.2.3_linux64.tar.gz -O csblast-2.2.3.tar.gz && \
+mkdir -p csblast-2.2.3 && \
+tar xf csblast-2.2.3.tar.gz -C csblast-2.2.3 --strip-components=1 && \
+rm csblast-2.2.3.tar.gz
+
+RUN wget https://ftp.ncbi.nlm.nih.gov/blast/executables/legacy.NOTSUPPORTED/2.2.26/blast-2.2.26-x64-linux.tar.gz && \
+    tar xf blast-2.2.26-x64-linux.tar.gz && \
+    rm blast-2.2.26-x64-linux.tar.gz
+
+RUN wget http://bioinfadmin.cs.ucl.ac.uk/downloads/psipred/psipred.4.02.tar.gz && \
+    tar xf psipred.4.02.tar.gz && \
+    rm psipred.4.02.tar.gz
+
+
+ADD . /workspace/rf
+WORKDIR /workspace/rf
+
+RUN wget https://openstructure.org/static/lddt-linux.zip -O lddt.zip && unzip -d lddt -j lddt.zip
+
+RUN pip install --upgrade pip
+RUN pip install -r requirements.txt
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2021 RosettaCommons
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
@@ -0,0 +1,94 @@
+# *RoseTTAFold* 
+This package contains deep learning models and related scripts to run RoseTTAFold.  
+This repository is the official implementation of RoseTTAFold: Accurate prediction of protein structures and interactions using a 3-track network.
+
+## Installation
+
+1. Clone the package
+```
+git clone https://github.com/RosettaCommons/RoseTTAFold.git
+cd RoseTTAFold
+```
+
+2. Create conda environment using `RoseTTAFold-linux.yml` file and `folding-linux.yml` file. The latter is required to run a pyrosetta version only (run_pyrosetta_ver.sh).
+```
+# create conda environment for RoseTTAFold
+#   If your NVIDIA driver compatible with cuda11
+conda env create -f RoseTTAFold-linux.yml
+#   If not (but compatible with cuda10)
+conda env create -f RoseTTAFold-linux-cu101.yml
+
+# create conda environment for pyRosetta folding & running DeepAccNet
+conda env create -f folding-linux.yml
+```
+
+3. Download network weights (under Rosetta-DL Software license -- please see below)  
+While the code is licensed under the MIT License, the trained weights and data for RoseTTAFold are made available for non-commercial use only under the terms of the Rosetta-DL Software license. You can find details at https://files.ipd.uw.edu/pub/RoseTTAFold/Rosetta-DL_LICENSE.txt
+
+```
+wget https://files.ipd.uw.edu/pub/RoseTTAFold/weights.tar.gz
+tar xfz weights.tar.gz
+```
+
+4. Download and install third-party software.
+```
+./install_dependencies.sh
+```
+
+5. Download sequence and structure databases
+```
+# uniref30 [46G]
+wget http://wwwuser.gwdg.de/~compbiol/uniclust/2020_06/UniRef30_2020_06_hhsuite.tar.gz
+mkdir -p UniRef30_2020_06
+tar xfz UniRef30_2020_06_hhsuite.tar.gz -C ./UniRef30_2020_06
+
+# BFD [272G]
+wget https://bfd.mmseqs.com/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt.tar.gz
+mkdir -p bfd
+tar xfz bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt.tar.gz -C ./bfd
+
+# structure templates (including *_a3m.ffdata, *_a3m.ffindex) [over 100G]
+wget https://files.ipd.uw.edu/pub/RoseTTAFold/pdb100_2021Mar03.tar.gz
+tar xfz pdb100_2021Mar03.tar.gz
+# for CASP14 benchmarks, we used this one: https://files.ipd.uw.edu/pub/RoseTTAFold/pdb100_2020Mar11.tar.gz
+```
+
+6. Obtain a [PyRosetta licence](https://els2.comotion.uw.edu/product/pyrosetta) and install the package in the newly created `folding` conda environment ([link](http://www.pyrosetta.org/downloads)).
+
+## Usage
+
+```
+# For monomer structure prediction
+cd example
+../run_[pyrosetta, e2e]_ver.sh input.fa .
+
+# For complex modeling
+# please see README file under example/complex_modeling/README for details.
+python network/predict_complex.py -i paired.a3m -o complex -Ls 218 310 
+```
+
+## Expected outputs
+For the pyrosetta version, user will get five final models having estimated CA rms error at the B-factor column (model/model_[1-5].crderr.pdb).  
+For the end-to-end version, there will be a single PDB output having estimated residue-wise CA-lddt at the B-factor column (t000_.e2e.pdb).
+
+## FAQ
+1. Segmentation fault while running hhblits/hhsearch  
+For easy install, we used a statically compiled version of hhsuite (installed through conda). Currently, we're not sure what exactly causes segmentation fault error in some cases, but we found that it might be resolved if you compile hhsuite from source and use this compiled version instead of conda version. For installation of hhsuite, please see [here](https://github.com/soedinglab/hh-suite).
+
+2. Submitting jobs to computing nodes  
+The modeling pipeline provided here (run_pyrosetta_ver.sh/run_e2e_ver.sh) is a kind of guidelines to show how RoseTTAFold works. For more efficient use of computing resources, you might want to modify the provided bash script to submit separate jobs with proper dependencies for each of steps (more cpus/memory for hhblits/hhsearch, using gpus only for running the networks, etc). 
+
+## Links:
+
+* [Robetta server](https://robetta.bakerlab.org/) (RoseTTAFold option)
+* [RoseTTAFold models for CASP14 targets](https://files.ipd.uw.edu/pub/RoseTTAFold/casp14_models.tar.gz) [input MSA and hhsearch files are included]
+
+## Credit to performer-pytorch and SE(3)-Transformer codes
+The code in the network/performer_pytorch.py is strongly based on [this repo](https://github.com/lucidrains/performer-pytorch) which is pytorch implementation of [Performer architecture](https://arxiv.org/abs/2009.14794).
+The codes in network/equivariant_attention is from the original SE(3)-Transformer [repo](https://github.com/FabianFuchsML/se3-transformer-public) which accompanies [the paper](https://arxiv.org/abs/2006.10503) 'SE(3)-Transformers: 3D Roto-Translation Equivariant Attention Networks' by Fabian et al.
+
+
+## References
+
+M Baek, et al., Accurate prediction of protein structures and interactions using a 3-track network, bioRxiv (2021). [link](https://www.biorxiv.org/content/10.1101/2021.06.14.448402v1)
+
@@ -0,0 +1,138 @@
+# RoseTTAFold for PyTorch
+
+This repository provides a script to run inference using the RoseTTAFold model. The content of this repository is tested and maintained by NVIDIA.
+
+## Table Of Contents
+
+- [Model overview](#model-overview)
+    * [Model architecture](#model-architecture)
+- [Setup](#setup)
+    * [Requirements](#requirements)
+- [Quick Start Guide](#quick-start-guide) 
+- [Release notes](#release-notes)
+    * [Changelog](#changelog)
+    * [Known issues](#known-issues)
+
+
+
+## Model overview
+
+The RoseTTAFold is a model designed to provide accurate protein structure from its amino acid sequence. This model is 
+based on [Accurate prediction of protein structures and interactions using a 3-track network](https://www.biorxiv.org/content/10.1101/2021.06.14.448402v1) by Minkyung Baek et al.
+
+This implementation is a dockerized version of the official [RoseTTAFold repository](https://github.com/RosettaCommons/RoseTTAFold/).
+Here you can find the [original RoseTTAFold guide](README-ROSETTAFOLD.md).
+
+### Model architecture
+
+The RoseTTAFold model is based on a 3-track architecture fusing 1D, 2D, and 3D information about the protein structure. 
+All information is exchanged between tracks to learn the sequence and coordinate patterns at the same time. The final prediction 
+is refined using an SE(3)-Transformer.   
+
+<img src="images/NetworkArchitecture.jpg" width="900"/>
+    
+*Figure 1: The RoseTTAFold architecture. Image comes from the [original paper](https://www.biorxiv.org/content/10.1101/2021.06.14.448402v1).*
+
+## Setup
+
+The following section lists the requirements that you need to meet in order to run inference using the RoseTTAFold model.
+
+### Requirements
+
+This repository contains a Dockerfile that extends the PyTorch NGC container and encapsulates necessary dependencies. Aside from these dependencies, ensure you have the following components:
+- [NVIDIA Docker](https://github.com/NVIDIA/nvidia-docker)
+- PyTorch 21.09-py3 NGC container
+- Supported GPUs:
+  - [NVIDIA Volta architecture](https://www.nvidia.com/en-us/data-center/volta-gpu-architecture/)
+  - [NVIDIA Turing architecture](https://www.nvidia.com/en-us/design-visualization/technologies/turing-architecture/)
+  - [NVIDIA Ampere architecture](https://www.nvidia.com/en-us/data-center/nvidia-ampere-gpu-architecture/)
+
+For more information about how to get started with NGC containers, refer to the following sections from the NVIDIA GPU Cloud Documentation and the Deep Learning Documentation:
+- [Getting Started Using NVIDIA GPU Cloud](https://docs.nvidia.com/ngc/ngc-getting-started-guide/index.html)
+- [Accessing And Pulling From The NGC Container Registry](https://docs.nvidia.com/deeplearning/frameworks/user-guide/index.html#accessing_registry)
+- [Running PyTorch](https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/running.html#running)
+  
+For those unable to use the PyTorch NGC container, to set up the required environment or create your own container, refer to the versioned [NVIDIA Container Support Matrix](https://docs.nvidia.com/deeplearning/frameworks/support-matrix/index.html).
+
+In addition, 1 TB of disk space is required to unpack the required databases.
+
+## Quick Start Guide
+
+To run inference using the RoseTTAFold model, perform the following steps using the default parameters.
+
+1. Clone the repository.
+    ```
+    git clone https://github.com/NVIDIA/DeepLearningExamples
+    cd DeepLearningExamples/DGLPyTorch/
+    ```
+
+2. Download the pre-trained weights and databases needed for inference.
+    The following command downloads the pre-trained weights and two databases needed to create derived features to the input to the model.
+    The script will download the `UniRef30` (~50 GB) and `pdb100_2021Mar03` (~115 GB) databases, which might take a considerable amount 
+    of time. Additionally, unpacking those databases requires approximately 1 TB of free disk space.
+
+    By default, the data will be downloaded to `./weights` and `./databases` folders in the current directory.
+    ```
+    bash scripts/download_databases.sh
+    ```
+    If you would like to specify the download location you can pass the following parameters    
+    ```
+    bash scripts/download_databases.sh PATH-TO-WEIGHTS PATH-TO-DATABASES
+    ```    
+
+3. Build the RoseTTAFold PyTorch NGC container. This step builds the PyTorch dependencies on your machine and can take between 30 minutes and 1 hour to complete. 
+    ```
+    docker build -t rosettafold .
+    ```
+
+4. Start an interactive session in the NGC container to run inference.
+    
+    The following command launches the container and mount the `PATH-TO-WEIGHTS` directory as a volume to the `/weights` directory in the container, the `PATH-TO-DATABASES` directory as a volume to the `/databases` directory in the container, and `./results` directory to the `/results` directory in the container.
+    ```
+    mkdir data results
+    docker run --ipc=host -it --rm --runtime=nvidia -p6006:6006 -v PATH-TO-WEIGHTS:/weights -v PATH-TO-DATABASES:/databases -v ${PWD}/results:/results rosettafold:latest /bin/bash
+    ```
+   
+5. Start inference/predictions.
+    
+    To run inference you have to prepare a FASTA file and pass a path to it or pass a sequence directly.
+    ```
+    python run_inference_pipeline.py [Sequence]
+    ```
+    There is an example FASTA file at `example/input.fa` for you to try. Running the inference pipeline consists of four steps:
+   1. Preparing the Multiple Sequence Alignments (MSAs)
+   2. Preparing the secondary structures
+   3. Preparing the templates
+   4. Iteratively refining the prediction
+   
+    The first three steps can take between a couple of minutes and an hour, depending on the sequence.
+    The output will be stored at the `/results` directory as an `output.e2e.pdb` file
+
+6. Start Jupyter Notebook to run inference interactively.
+
+   To launch the application, copy the Notebook to the root folder.
+    ```
+    cp notebooks/run_inference.ipynb .
+    
+    ```
+    To start Jupyter Notebook, run:
+    ```
+    jupyter notebook run_inference.ipynb
+    ```
+    
+    For more information about Jupyter Notebook, refer to the Jupyter Notebook documentation.
+
+
+## Release notes
+
+### Changelog
+
+October 2021
+- Initial release
+
+### Known issues
+
+There are no known issues with this model.
+
+
+