From 09422cad464298f150068c3c7b00bfc6e859ccbb Mon Sep 17 00:00:00 2001 From: zeryx <1892175+zeryx@users.noreply.github.com> Date: Sat, 11 Jun 2022 21:30:20 -0300 Subject: [PATCH 1/7] initial testable commit --- adk/ADK.py | 23 ++++++++++++++- adk/mlops.py | 36 +++++++++++++++++++++++ adk/modeldata.py | 24 ++++++++++++--- tests/manifests/mlops_model_manifest.json | 9 ++++++ 4 files changed, 87 insertions(+), 5 deletions(-) create mode 100644 adk/mlops.py create mode 100644 tests/manifests/mlops_model_manifest.json diff --git a/adk/ADK.py b/adk/ADK.py index 10c0222..5994767 100644 --- a/adk/ADK.py +++ b/adk/ADK.py @@ -3,6 +3,10 @@ import os import sys import Algorithmia +import yaml +import os +import subprocess + from adk.io import create_exception, format_data, format_response from adk.modeldata import ModelData @@ -92,7 +96,24 @@ def process_local(self, local_payload, pprint): result = self.apply(local_payload) self.write_to_pipe(result, pprint=pprint) - def init(self, local_payload=None, pprint=print): + def mlops_initialize(self): + os.environ["MLOPS_SPOOLER_TYPE"] = "FILESYSTEM" + os.environ["MLOPS_FILESYSTEM_DIRECTORY"] = self.mlops_spool_dir + with open(f'{agents_dir}/conf/mlops.agent.conf.yaml') as f: + documents = yaml.load(f, Loader=yaml.FullLoader) + documents['mlopsUrl'] = DATAROBOT_ENDPOINT + documents['apiToken'] = DATAROBOT_API_TOKEN + with open(f'{agents_dir}/conf/mlops.agent.conf.yaml', 'w') as f: + yaml.dump(documents, f) + subprocess.call(f'{agents_dir}/bin/start-agent.sh') + check = subprocess.Popen([f'{agents_dir}/bin/status-agent.sh'], stdout=subprocess.PIPE) + check.terminate() + + + + def init(self, local_payload=None, pprint=print, mlops=False): + if mlops and not self.is_local: + self.mlops_initialize() self.load() if self.is_local and local_payload is not None: if self.loading_exception: diff --git a/adk/mlops.py b/adk/mlops.py new file mode 100644 index 0000000..6ac1dc9 --- /dev/null +++ b/adk/mlops.py @@ -0,0 +1,36 @@ +import yaml +import os +import subprocess + + +class MLOps(Object): + def __init__(self, endpoint, api_token, model_id, deployment_id): + self.token = api_token + self.endpoint = endpoint + self.model_id = model_id + self.deployment_id = deployment_id + self.spool_dir = "/tmp/ta" + self.agent_dir = "/opt/mlops-agent/datarobot_mlops_package-8.1.2" + + def init(self): + with open(f'{self.agent_dir}/conf/mlops.agent.conf.yaml') as f: + documents = yaml.load(f, Loader=yaml.FullLoader) + documents['mlopsUrl'] = self.endpoint + documents['apiToken'] = self.token + with open(f'{agents_dir}/conf/mlops.agent.conf.yaml', 'w') as f: + yaml.dump(documents, f) + + subprocess.call(f'{agents_dir}/bin/start-agent.sh') + check = subprocess.Popen([f'{agents_dir}/bin/status-agent.sh'], stdout=subprocess.PIPE) + output = check.stdout.readlines() + check.terminate() + if "DataRobot MLOps-Agent is running as a service." in output: + return True + else: + return False + + def env_vars(self): + os.environ['MLOPS_DEPLOYMENT_ID'] = self.deployment_id + os.environ['MLOPS_MODEL_ID'] = self.model_id + os.environ['MLOPS_SPOOLER_TYPE'] = "FILESYSTEM" + os.environ['MLOPS_FILESYSTEM_DIRECTORY'] = "/tmp/ta" \ No newline at end of file diff --git a/adk/modeldata.py b/adk/modeldata.py index 0b6acab..6c832e2 100644 --- a/adk/modeldata.py +++ b/adk/modeldata.py @@ -2,10 +2,11 @@ import json import hashlib from adk.classes import FileData +from adk.mlops import MLOps class ModelData(object): - def __init__(self, client, model_manifest_path): + def __init__(self, client, model_manifest_path, mlops=False): self.manifest_reg_path = model_manifest_path self.manifest_frozen_path = "{}.freeze".format(self.manifest_reg_path) self.manifest_data = self.get_manifest() @@ -13,6 +14,7 @@ def __init__(self, client, model_manifest_path): self.models = {} self.usr_key = "__user__" self.using_frozen = True + self.use_mlops = mlops def __getitem__(self, key): return getattr(self, self.usr_key + key) @@ -38,6 +40,8 @@ def available(self): def initialize(self): if self.client is None: raise Exception("Client was not defined, please define a Client when using Model Manifests.") + if self.use_mlops: + self.mlops_init() for required_file in self.manifest_data['required_files']: name = required_file['name'] source_uri = required_file['source_uri'] @@ -88,7 +92,6 @@ def find_optional_model(self, file_name): else: self.models[file_name] = FileData(real_hash, local_data_path) - def get_manifest(self): if os.path.exists(self.manifest_frozen_path): with open(self.manifest_frozen_path) as f: @@ -96,8 +99,9 @@ def get_manifest(self): if check_lock(manifest_data): return manifest_data else: - raise Exception("Manifest FreezeFile Tamper Detected; please use the CLI and 'algo freeze' to rebuild your " - "algorithm's freeze file.") + raise Exception( + "Manifest FreezeFile Tamper Detected; please use the CLI and 'algo freeze' to rebuild your " + "algorithm's freeze file.") elif os.path.exists(self.manifest_reg_path): with open(self.manifest_reg_path) as f: manifest_data = json.load(f) @@ -106,6 +110,18 @@ def get_manifest(self): else: return None + def mlops_init(self): + mlops = self.manifest_data['mlops'] + model_id = mlops['model_id'] + deployment_id = mlops['deployment_id'] + datarobot_api_endpoint = mlops['datarobot_api_endpoint'] + + api_token = os.environ.get('DATAROBOT_MLOPS_API_TOKEN') + if api_token is None: + raise Exception("'DATAROBOT_MLOPS_API_TOKEN' environment variable not found.\nPlease ensure that you have a" + "valid API token and add it as a secret to this algorithm.") + self.mlops = MLOps(datarobot_api_endpoint, api_token, model_id, deployment_id) + def check_lock(manifest_data): expected_lock_checksum = manifest_data.get('lock_checksum') diff --git a/tests/manifests/mlops_model_manifest.json b/tests/manifests/mlops_model_manifest.json new file mode 100644 index 0000000..0d85f61 --- /dev/null +++ b/tests/manifests/mlops_model_manifest.json @@ -0,0 +1,9 @@ +{ + "mlops": { + "model_id": "", + "deployment_id": "", + "datarobot_api_endpoint": "https://app.datarobot.com" + }, + "required_models": [], + "optional_models": [] +} \ No newline at end of file From 435e4fe5f6e2c3d6c57f158168d59de256f3e932 Mon Sep 17 00:00:00 2001 From: zeryx <1892175+zeryx@users.noreply.github.com> Date: Sun, 12 Jun 2022 01:24:11 -0300 Subject: [PATCH 2/7] functional, feature filled commit --- adk/ADK.py | 33 +++++++--------- adk/mlops.py | 46 +++++++++++++---------- adk/modeldata.py | 18 +-------- tests/manifests/mlops_model_manifest.json | 9 ----- 4 files changed, 41 insertions(+), 65 deletions(-) delete mode 100644 tests/manifests/mlops_model_manifest.json diff --git a/adk/ADK.py b/adk/ADK.py index 5994767..a3edd54 100644 --- a/adk/ADK.py +++ b/adk/ADK.py @@ -9,6 +9,7 @@ from adk.io import create_exception, format_data, format_response from adk.modeldata import ModelData +from adk.mlops import MLOps class ADK(object): @@ -21,6 +22,7 @@ def __init__(self, apply_func, load_func=None, client=None): :param client: A Algorithmia Client instance that might be user defined, and is used for interacting with a model manifest file; if defined. """ + self.mlops = None self.FIFO_PATH = "/tmp/algoout" if client: @@ -43,10 +45,8 @@ def __init__(self, apply_func, load_func=None, client=None): self.load_result = None self.loading_exception = None self.manifest_path = "model_manifest.json" - self.model_data = self.init_manifest(self.manifest_path) - - def init_manifest(self, path): - return ModelData(self.client, path) + self.mlops_path = "mlops.json" + self.model_data = ModelData(self.client, self.manifest_path) def load(self): try: @@ -95,25 +95,18 @@ def write_to_pipe(self, payload, pprint=print): def process_local(self, local_payload, pprint): result = self.apply(local_payload) self.write_to_pipe(result, pprint=pprint) - - def mlops_initialize(self): - os.environ["MLOPS_SPOOLER_TYPE"] = "FILESYSTEM" - os.environ["MLOPS_FILESYSTEM_DIRECTORY"] = self.mlops_spool_dir - with open(f'{agents_dir}/conf/mlops.agent.conf.yaml') as f: - documents = yaml.load(f, Loader=yaml.FullLoader) - documents['mlopsUrl'] = DATAROBOT_ENDPOINT - documents['apiToken'] = DATAROBOT_API_TOKEN - with open(f'{agents_dir}/conf/mlops.agent.conf.yaml', 'w') as f: - yaml.dump(documents, f) - subprocess.call(f'{agents_dir}/bin/start-agent.sh') - check = subprocess.Popen([f'{agents_dir}/bin/status-agent.sh'], stdout=subprocess.PIPE) - check.terminate() - - + + def mlops_init(self): + mlops_token = os.environ.get("DATAROBOT_MLOPS_API_TOKEN", None) + if mlops_token: + self.mlops = MLOps(mlops_token, self.mlops_path) + self.mlops.init() + else: + raise Exception("'DATAROBOT_MLOPS_API_TOKEN' was not found, please set to use mlops.") def init(self, local_payload=None, pprint=print, mlops=False): if mlops and not self.is_local: - self.mlops_initialize() + self.mlops_init() self.load() if self.is_local and local_payload is not None: if self.loading_exception: diff --git a/adk/mlops.py b/adk/mlops.py index 6ac1dc9..8b4fe19 100644 --- a/adk/mlops.py +++ b/adk/mlops.py @@ -1,36 +1,44 @@ import yaml +import json import os import subprocess -class MLOps(Object): - def __init__(self, endpoint, api_token, model_id, deployment_id): +class MLOps(object): + spool_dir = "/tmp/ta" + agent_dir = "/opt/mlops-agent/datarobot_mlops_package-8.1.2" + + def __init__(self, api_token, path): self.token = api_token - self.endpoint = endpoint - self.model_id = model_id - self.deployment_id = deployment_id - self.spool_dir = "/tmp/ta" - self.agent_dir = "/opt/mlops-agent/datarobot_mlops_package-8.1.2" + if os.path.exists(path): + with open(path) as f: + mlops_config = json.load(f) + else: + raise Exception("'mlops.json' file does not exist, but mlops was requested.") + if not os.path.exists(agent_dir): + raise Exception("environment is not configured for mlops.\nPlease select a valid mlops enabled environment.") + self.endpoint = mlops_config['datarobot_api_endpoint'] + self.model_id = mlops_config['model_id'] + self.deployment_id = mlops_config['deployment_id'] def init(self): + os.environ['MLOPS_DEPLOYMENT_ID'] = self.deployment_id + os.environ['MLOPS_MODEL_ID'] = self.model_id + os.environ['MLOPS_SPOOLER_TYPE'] = "FILESYSTEM" + os.environ['MLOPS_FILESYSTEM_DIRECTORY'] = "/tmp/ta" + with open(f'{self.agent_dir}/conf/mlops.agent.conf.yaml') as f: documents = yaml.load(f, Loader=yaml.FullLoader) documents['mlopsUrl'] = self.endpoint documents['apiToken'] = self.token - with open(f'{agents_dir}/conf/mlops.agent.conf.yaml', 'w') as f: + with open(f'{self.agent_dir}/conf/mlops.agent.conf.yaml', 'w') as f: yaml.dump(documents, f) - subprocess.call(f'{agents_dir}/bin/start-agent.sh') - check = subprocess.Popen([f'{agents_dir}/bin/status-agent.sh'], stdout=subprocess.PIPE) - output = check.stdout.readlines() + subprocess.call(f'{self.agent_dir}/bin/start-agent.sh') + check = subprocess.Popen([f'{self.agent_dir}/bin/status-agent.sh'], stdout=subprocess.PIPE) + output = check.stdout.readlines()[0] check.terminate() - if "DataRobot MLOps-Agent is running as a service." in output: + if b"DataRobot MLOps-Agent is running as a service." in output: return True else: - return False - - def env_vars(self): - os.environ['MLOPS_DEPLOYMENT_ID'] = self.deployment_id - os.environ['MLOPS_MODEL_ID'] = self.model_id - os.environ['MLOPS_SPOOLER_TYPE'] = "FILESYSTEM" - os.environ['MLOPS_FILESYSTEM_DIRECTORY'] = "/tmp/ta" \ No newline at end of file + raise Exception(output) \ No newline at end of file diff --git a/adk/modeldata.py b/adk/modeldata.py index 6c832e2..e2b49cd 100644 --- a/adk/modeldata.py +++ b/adk/modeldata.py @@ -2,11 +2,10 @@ import json import hashlib from adk.classes import FileData -from adk.mlops import MLOps class ModelData(object): - def __init__(self, client, model_manifest_path, mlops=False): + def __init__(self, client, model_manifest_path): self.manifest_reg_path = model_manifest_path self.manifest_frozen_path = "{}.freeze".format(self.manifest_reg_path) self.manifest_data = self.get_manifest() @@ -14,7 +13,6 @@ def __init__(self, client, model_manifest_path, mlops=False): self.models = {} self.usr_key = "__user__" self.using_frozen = True - self.use_mlops = mlops def __getitem__(self, key): return getattr(self, self.usr_key + key) @@ -40,8 +38,6 @@ def available(self): def initialize(self): if self.client is None: raise Exception("Client was not defined, please define a Client when using Model Manifests.") - if self.use_mlops: - self.mlops_init() for required_file in self.manifest_data['required_files']: name = required_file['name'] source_uri = required_file['source_uri'] @@ -110,18 +106,6 @@ def get_manifest(self): else: return None - def mlops_init(self): - mlops = self.manifest_data['mlops'] - model_id = mlops['model_id'] - deployment_id = mlops['deployment_id'] - datarobot_api_endpoint = mlops['datarobot_api_endpoint'] - - api_token = os.environ.get('DATAROBOT_MLOPS_API_TOKEN') - if api_token is None: - raise Exception("'DATAROBOT_MLOPS_API_TOKEN' environment variable not found.\nPlease ensure that you have a" - "valid API token and add it as a secret to this algorithm.") - self.mlops = MLOps(datarobot_api_endpoint, api_token, model_id, deployment_id) - def check_lock(manifest_data): expected_lock_checksum = manifest_data.get('lock_checksum') diff --git a/tests/manifests/mlops_model_manifest.json b/tests/manifests/mlops_model_manifest.json deleted file mode 100644 index 0d85f61..0000000 --- a/tests/manifests/mlops_model_manifest.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "mlops": { - "model_id": "", - "deployment_id": "", - "datarobot_api_endpoint": "https://app.datarobot.com" - }, - "required_models": [], - "optional_models": [] -} \ No newline at end of file From e0ec40e949073ced1e436a458e5b6fa25b5322c0 Mon Sep 17 00:00:00 2001 From: zeryx <1892175+zeryx@users.noreply.github.com> Date: Sun, 12 Jun 2022 01:59:10 -0300 Subject: [PATCH 3/7] added yaml as dependency --- adk/ADK.py | 1 - requirements.txt | 3 ++- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/adk/ADK.py b/adk/ADK.py index a3edd54..718ef98 100644 --- a/adk/ADK.py +++ b/adk/ADK.py @@ -3,7 +3,6 @@ import os import sys import Algorithmia -import yaml import os import subprocess diff --git a/requirements.txt b/requirements.txt index ccb528b..8ba7957 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,3 @@ algorithmia>=1.7,<2 -six \ No newline at end of file +six +pyaml==21.10 \ No newline at end of file From b32a1be13f0938aa34fcc249d48166802b65e412 Mon Sep 17 00:00:00 2001 From: zeryx <1892175+zeryx@users.noreply.github.com> Date: Sun, 12 Jun 2022 02:01:52 -0300 Subject: [PATCH 4/7] ugh version mgmt --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 8ba7957..eda150c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ algorithmia>=1.7,<2 six -pyaml==21.10 \ No newline at end of file +pyaml>=21.10,<21.11 \ No newline at end of file From 485ef9f65f5959b732608b3587b6cf603bf71a20 Mon Sep 17 00:00:00 2001 From: zeryx <1892175+zeryx@users.noreply.github.com> Date: Sun, 12 Jun 2022 02:03:25 -0300 Subject: [PATCH 5/7] fix test shim --- tests/AdkTest.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/AdkTest.py b/tests/AdkTest.py index e6b4672..941d848 100644 --- a/tests/AdkTest.py +++ b/tests/AdkTest.py @@ -1,7 +1,7 @@ from adk import ADK - +from adk.modeldata import ModelData class ADKTest(ADK): def __init__(self, apply_func, load_func=None, client=None, manifest_path="model_manifest.json.freeze"): super(ADKTest, self).__init__(apply_func, load_func, client) - self.model_data = self.init_manifest(manifest_path) + self.model_data = ModelData(self.client, manifest_path) From c1d82009375317c25eb60e5472e1724119917d5f Mon Sep 17 00:00:00 2001 From: zeryx <1892175+zeryx@users.noreply.github.com> Date: Wed, 15 Jun 2022 14:16:34 -0300 Subject: [PATCH 6/7] replaced wildcard with actual path, which can be overridedn by the mlops.json file --- adk/mlops.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/adk/mlops.py b/adk/mlops.py index 8b4fe19..a64efbd 100644 --- a/adk/mlops.py +++ b/adk/mlops.py @@ -6,7 +6,8 @@ class MLOps(object): spool_dir = "/tmp/ta" - agent_dir = "/opt/mlops-agent/datarobot_mlops_package-8.1.2" + agent_dir = "/opt/mlops-agent" + mlops_dir_name = "datarobot_mlops_package-8.1.2" def __init__(self, api_token, path): self.token = api_token @@ -15,27 +16,28 @@ def __init__(self, api_token, path): mlops_config = json.load(f) else: raise Exception("'mlops.json' file does not exist, but mlops was requested.") - if not os.path.exists(agent_dir): + if not os.path.exists(self.agent_dir): raise Exception("environment is not configured for mlops.\nPlease select a valid mlops enabled environment.") - self.endpoint = mlops_config['datarobot_api_endpoint'] + self.endpoint = mlops_config['datarobot_mlops_service_url'] self.model_id = mlops_config['model_id'] self.deployment_id = mlops_config['deployment_id'] + self.mlops_name = mlops_config.get('mlops_dir_name', 'datarobot_mlops_package-8.1.2') def init(self): os.environ['MLOPS_DEPLOYMENT_ID'] = self.deployment_id os.environ['MLOPS_MODEL_ID'] = self.model_id os.environ['MLOPS_SPOOLER_TYPE'] = "FILESYSTEM" - os.environ['MLOPS_FILESYSTEM_DIRECTORY'] = "/tmp/ta" + os.environ['MLOPS_FILESYSTEM_DIRECTORY'] = self.spool_dir - with open(f'{self.agent_dir}/conf/mlops.agent.conf.yaml') as f: + with open(f'{self.agent_dir}/{self.mlops_dir_name}/conf/mlops.agent.conf.yaml') as f: documents = yaml.load(f, Loader=yaml.FullLoader) documents['mlopsUrl'] = self.endpoint documents['apiToken'] = self.token - with open(f'{self.agent_dir}/conf/mlops.agent.conf.yaml', 'w') as f: + with open(f'{self.agent_dir}/{self.mlops_dir_name}/conf/mlops.agent.conf.yaml', 'w') as f: yaml.dump(documents, f) - subprocess.call(f'{self.agent_dir}/bin/start-agent.sh') - check = subprocess.Popen([f'{self.agent_dir}/bin/status-agent.sh'], stdout=subprocess.PIPE) + subprocess.call(f'{self.agent_dir}/{self.mlops_dir_name}/bin/start-agent.sh') + check = subprocess.Popen([f'{self.agent_dir}/{self.mlops_dir_name}/bin/status-agent.sh'], stdout=subprocess.PIPE) output = check.stdout.readlines()[0] check.terminate() if b"DataRobot MLOps-Agent is running as a service." in output: From a44964b611c85ed161e8c4482edf07d29a4e667c Mon Sep 17 00:00:00 2001 From: zeryx <1892175+zeryx@users.noreply.github.com> Date: Wed, 15 Jun 2022 17:43:49 -0300 Subject: [PATCH 7/7] added pyaml to the dependencies --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index f470b4a..8d34878 100644 --- a/setup.py +++ b/setup.py @@ -14,6 +14,7 @@ author_email='support@algorithmia.com', packages=['adk'], install_requires=[ + 'pyaml>=21.10,<21.11', 'six', ], include_package_data=True,