Skip to content

Commit b31d091

Browse files
committed
[Electra/TF2] Adding new model
1 parent 1803824 commit b31d091

38 files changed

Lines changed: 12623 additions & 0 deletions
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
# Initially taken from Github's Python gitignore file
2+
3+
# Byte-compiled / optimized / DLL files
4+
__pycache__/
5+
*.py[cod]
6+
*$py.class
7+
8+
# C extensions
9+
*.so
10+
11+
#Data checkpoints and results
12+
data/*/*/
13+
data/*/*.zip
14+
checkpoints/
15+
results
16+
results/*
17+
18+
#Editor
19+
.idea
20+
.idea/*
21+
22+
# Distribution / packaging
23+
.Python
24+
build/
25+
develop-eggs/
26+
dist/
27+
downloads/
28+
eggs/
29+
.eggs/
30+
lib/
31+
lib64/
32+
parts/
33+
sdist/
34+
var/
35+
wheels/
36+
*.egg-info/
37+
.installed.cfg
38+
*.egg
39+
MANIFEST
40+
41+
# PyInstaller
42+
# Usually these files are written by a python script from a template
43+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
44+
*.manifest
45+
*.spec
46+
47+
# Installer logs
48+
pip-log.txt
49+
pip-delete-this-directory.txt
50+
51+
# Unit test / coverage reports
52+
htmlcov/
53+
.tox/
54+
.nox/
55+
.coverage
56+
.coverage.*
57+
.cache
58+
nosetests.xml
59+
coverage.xml
60+
*.cover
61+
.hypothesis/
62+
.pytest_cache/
63+
64+
# Translations
65+
*.mo
66+
*.pot
67+
68+
# Django stuff:
69+
*.log
70+
local_settings.py
71+
db.sqlite3
72+
73+
# Flask stuff:
74+
instance/
75+
.webassets-cache
76+
77+
# Scrapy stuff:
78+
.scrapy
79+
80+
# Sphinx documentation
81+
docs/_build/
82+
83+
# PyBuilder
84+
target/
85+
86+
# Jupyter Notebook
87+
.ipynb_checkpoints
88+
89+
# IPython
90+
profile_default/
91+
ipython_config.py
92+
93+
# pyenv
94+
.python-version
95+
96+
# celery beat schedule file
97+
celerybeat-schedule
98+
99+
# SageMath parsed files
100+
*.sage.py
101+
102+
# Environments
103+
.env
104+
.venv
105+
env/
106+
venv/
107+
ENV/
108+
env.bak/
109+
venv.bak/
110+
111+
# Spyder project settings
112+
.spyderproject
113+
.spyproject
114+
115+
# Rope project settings
116+
.ropeproject
117+
118+
# mkdocs documentation
119+
/site
120+
121+
# mypy
122+
.mypy_cache/
123+
.dmypy.json
124+
dmypy.json
125+
126+
# Pyre type checker
127+
.pyre/
128+
129+
# vscode
130+
.vscode
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
# Copyright (c) 2020 NVIDIA CORPORATION. All rights reserved.
2+
# Licensed under the Apache License, Version 2.0 (the "License");
3+
# you may not use this file except in compliance with the License.
4+
# You may obtain a copy of the License at
5+
#
6+
# http://www.apache.org/licenses/LICENSE-2.0
7+
#
8+
# Unless required by applicable law or agreed to in writing, software
9+
# distributed under the License is distributed on an "AS IS" BASIS,
10+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
# See the License for the specific language governing permissions and
12+
# limitations under the License.
13+
14+
ARG FROM_IMAGE_NAME=nvcr.io/nvidian/tensorflow:20.06-tf2-py3
15+
#FROM gitlab-master.nvidia.com:5005/dl/dgx/tensorrtserver:master-py3.1164446-client as trt
16+
#FROM nvcr.io/nvidia/tensorrtserver:20.03-py3-clientsdk as trt
17+
FROM ${FROM_IMAGE_NAME}
18+
RUN apt-get update && apt-get install -y pbzip2 pv bzip2 cabextract
19+
20+
ENV DATA_PREP_WORKING_DIR /workspace/electra/data
21+
22+
WORKDIR /workspace
23+
RUN git clone https://github.com/attardi/wikiextractor.git
24+
RUN git clone https://github.com/soskek/bookcorpus.git
25+
26+
# Copy the perf_client over
27+
#COPY --from=trt /workspace/install/ /workspace/install/
28+
#ENV LD_LIBRARY_PATH /workspace/install/lib:${LD_LIBRARY_PATH}
29+
30+
# Install trt python api
31+
#RUN pip install /workspace/install/python/tensorrtserver-1.*-py3-none-linux_x86_64.whl
32+
33+
WORKDIR /workspace/electra
34+
RUN pip install --upgrade --no-cache-dir pip \
35+
&& pip install --no-cache-dir \
36+
tqdm boto3 requests six ipdb h5py html2text nltk progressbar filelock tokenizers==0.7.0 \
37+
git+https://github.com/NVIDIA/dllogger
38+
39+
RUN apt-get install -y iputils-ping
40+
COPY . .

TensorFlow2/LanguageModeling/ELECTRA/README.md

Lines changed: 607 additions & 0 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)