diff --git a/README.md b/README.md index 5caa72d..7855447 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,47 @@ -# Anki Vector - Python SDK +# Amitabha's Anki Vector - Python SDK + +## Amitabha's modifications to Official Anki Vector SDK + +Current (v 0.7.0.hiamitabha): +- Ability to detect and view another Vector robot with the help of the viewer + +Future/Planned: +- Raising an event when a vector robot appears/ dissappears, is first observed. +- Ability to track and distinguish multiple vectors with the color of their eye + +## Notes on usage +This SDK uses an ML inference API to detect other Vector robots. The API service is +provided by Roboflow. You have two ways to use this: + +1) You could use the Roboflow API with my account credentials. In that case you would have to +fill in the parameters specified in ml/config.json with those provided by me. To get these +parameters, you would need to message me via contacts listed on my github profile: +https://github.com/hiamitabha Please adhere to the restrictions described in my email, as any +abuse of the services provided by my account effects everybody using the service. + +2) You could fork my public dataset at https://public.roboflow.com/object-detection/robot +After forking, you can generate your own dataset by choosing various preprocessing and augmentation +options that Roboflow provides. Thereafter, you would have to train your model at Roboflow, so that +they can provide you with an API to do inference. You can then pick up the datset name, model version, +and roboflow key from the curl URL and specify them in ml/config.json. Complete details on how to +train a model at Roboflow is available at https://docs.roboflow.com/train + +## Execution +To get a feeling of the power of this SDK, please try the following program: + +python3 examples/tutorials/19_show_video_feed.py + +Here is an example output +[video](https://youtu.be/Nw9a50zGnvs) +from this program. + +## Feedback +I have invested considerable time and effort building this SDK, and any feedback that you can provide +would be very helpful. Please reach me via my contacts at my github profile: https://github.com/hiamitabha +for any feedback + + +# Generic Notes from standard version of Vector SDK (v0.6) ![Vector](docs/source/images/vector-sdk-alpha.jpg) diff --git a/anki_vector/connection.py b/anki_vector/connection.py index 472b0d8..27aebda 100644 --- a/anki_vector/connection.py +++ b/anki_vector/connection.py @@ -74,9 +74,9 @@ class _ControlEventManager: """ def __init__(self, loop: asyncio.BaseEventLoop = None, priority: ControlPriorityLevel = None): - self._granted_event = asyncio.Event(loop=loop) - self._lost_event = asyncio.Event(loop=loop) - self._request_event = asyncio.Event(loop=loop) + self._granted_event = asyncio.Event() + self._lost_event = asyncio.Event() + self._request_event = asyncio.Event() self._has_control = False self._priority = priority self._is_shutdown = False diff --git a/anki_vector/events.py b/anki_vector/events.py index 259bc28..6cbb5f6 100644 --- a/anki_vector/events.py +++ b/anki_vector/events.py @@ -130,7 +130,7 @@ def _run_thread(self): try: self._loop = asyncio.new_event_loop() asyncio.set_event_loop(self._loop) - self._done_signal = asyncio.Event(loop=self._loop) + self._done_signal = asyncio.Event() # create an event stream handler on the connection thread self.event_future = asyncio.run_coroutine_threadsafe(self._handle_event_stream(), self._conn.loop) diff --git a/anki_vector/ml/agent.py b/anki_vector/ml/agent.py new file mode 100755 index 0000000..6b45aba --- /dev/null +++ b/anki_vector/ml/agent.py @@ -0,0 +1,191 @@ +"""A machine learning to support operations via roboflow.ai +""" +import requests +import io +import os +import json +from requests_toolbelt.multipart.encoder import MultipartEncoder + +_CONFIG_FILE = "config.json" +_MODEL_SWAP_ITERATIONS = 20 +_VERSION_COLOR_CODE = '#8e3c44' + +try: + from PIL import Image, ImageDraw, ImageFont +except ImportError: + sys.exit("Cannot import from PIL: Do `pip3 install --user Pillow` to install") + +class MLAgent: + """MLAgent drives all ML driven operations with the help of support provided by roboflow.ai + """ + def __init__(self): + current_dir = os.path.dirname(os.path.realpath(__file__)) + config_file = os.path.join(current_dir,_CONFIG_FILE) + with open(config_file) as json_file: + config = json.load(json_file) + self.dataset = config['dataset'] + self.modelUuid = config['modelUuid'].split(",") + self.roboflowKey = config['roboflowKey'] + self.uploadNewImages = config['uploadNewImages'] + self.type = config.get('type') + self.numModels = len(self.modelUuid) + self.currentModelId = 0 + self.modelUseCounter = 0 + + def upload_image(self, image: Image.Image, + imageName: str): + """Code to upload an image to Roboflow. + Code is borrowed from the example at: + https://docs.roboflow.com/adding-data/upload-api + :param image: Input image that needs to be uploaded + :param imageName: The name with which the image needs to be uploaded + """ + #First check if permissions are available to upload new images. + if not self.uploadNewImages: + return + # Convert to JPEG Buffer + buffered = io.BytesIO() + image.save(buffered, quality=90, format="JPEG") + + # Build multipart form and post request + m = MultipartEncoder(fields={'file': ("imageToUpload", buffered.getvalue(), "image/jpeg")}) + + # Construct the URL + upload_url = "".join([ + "https://api.roboflow.com/dataset/", + self.dataset, + "/upload", + "?api_key=", + self.roboflowKey, + "&name=", + imageName, + "&split=train" + ]) + + # POST to the API + result = requests.post(upload_url, data=m, headers={ + "Content-Type": m.content_type + }) + + res = result.json() + success = res.get('success') + if not success: + print(res) + else: + print("Image %s uploaded successfully!" % imageName) + + def updateCurrentModelId(self): + """Updates the model Id depending on the usage to facilitate A/B testing + """ + self.modelUseCounter += 1 + if (self.modelUseCounter % _MODEL_SWAP_ITERATIONS == 0): + self.currentModelId +=1 + if (self.currentModelId == self.numModels): + self.currentModelId = 0 + + def run_inference_via_roboflow(self, image: Image.Image): + """Run inference via roboflow APIs. Returns image with bounded boxes + :param image: The image to run inference on + :return Returns a tuple of an image with bounding boxes around the + objects detected and a set of tags of all the objects detected + """ + # Convert to JPEG Buffer + buffered = io.BytesIO() + image.save(buffered, quality=90, format="JPEG") + + # Build multipart form and post request + m = MultipartEncoder(fields={'file': ("imageToUpload", buffered.getvalue(), "image/jpeg")}) + self.updateCurrentModelId() + model = self.modelUuid[self.currentModelId] + # Construct the Roboflow URL to do Inference + if self.type == 'object-detection': + upload_url = "".join([ + "https://detect.roboflow.com/", + self.dataset, + "/", + model, + "?api_key=", + self.roboflowKey, + "&format=json" + ]) + elif self.type == 'instance-segmentation' + upload_url = "".join([ + "https://outline.roboflow.com/", + self.dataset, + "/", + model, + "?api_key=", + self.roboflowKey, + "&format=json" + ]) + + # POST request to the API + r = requests.post(upload_url, data=m, headers={ + "Content-Type": m.content_type + }) + preds = r.json() + detections = preds['predictions'] + + draw = ImageDraw.Draw(image) + font = ImageFont.load_default() + draw.text((10,10), "Running model %s version %s " % (self.dataset, model), + fill=_VERSION_COLOR_CODE) + + for box in detections: + color = "#4892EA" + x1 = box['x'] - box['width'] / 2 + y1 = box['y'] - box['height'] / 2 + if self.type == 'object-detection': + x2 = box['x'] + box['width'] / 2 + y2 = box['y'] + box['height'] / 2 + draw.rectangle([ + x1, y1, x2, y2 + ], outline=color, width=5) + elif self.type == 'instance-segmentation': + points = box.get('points') + start_x = points[0]['x'] + start_y = points[0]['y'] + for point in points[1:]: + next_x = point['x'] + next_y = point['y'] + draw.line([start_x, start_y, next_x, next_y], fill=color, width=5) + start_x = next_x + start_y = next_y + + #text = box['class'] + '_modelv_' + model + text = box['class'] + text_size = font.getsize(text) + + #set button size + 10px margins + button_size = (text_size[0]+20, text_size[1]+20) + button_img = Image.new('RGBA', button_size, color) + # put text on button with 10px margins + button_draw = ImageDraw.Draw(button_img) + button_draw.text((10, 10), text, font=font, fill=(255,255,255,255)) + + # put button on source image in position (0, 0) + image.paste(button_img, (int(x1), int(y1))) + return image + + def run_inference_via_custom_url(self, image: Image.Image): + """Run inference via a custom URL. + Future: Read the custom URL from a config file. + """ + buffered = io.BytesIO() + image.save(buffered, quality=90, format="JPEG") + url = 'http://localhost:5000/' + files = {'file': buffered.getvalue(), 'model_choice':'best_s'} + response = requests.post(url, files=files) + image = Image.open(io.BytesIO(response.content)) + return image + + def run_inference(self, image: Image.Image): + """Run inference on the provided image with the help of Roboflow + inference API. Returns an annotated Image in case inference detects + an object + :param image: The image to run inference on + :return Returns a tuple of an image with bounding boxes around the + objects detected and a set of tags of all the objects detected + """ + image = self.run_inference_via_roboflow(image) + return image diff --git a/anki_vector/ml/config.json b/anki_vector/ml/config.json new file mode 100644 index 0000000..96568a6 --- /dev/null +++ b/anki_vector/ml/config.json @@ -0,0 +1,7 @@ +{ + "dataset": "", + "modelUuid": "", + "roboflowKey": "", + "uploadNewImages": False, + "type": "instance-segmentation" +} diff --git a/anki_vector/version.py b/anki_vector/version.py index 252ecf8..2d0f1a2 100644 --- a/anki_vector/version.py +++ b/anki_vector/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "0.6.1.dev0" +__version__ = "0.7.0.hiamitabha" diff --git a/anki_vector/viewer.py b/anki_vector/viewer.py index 56fa095..a7ee802 100644 --- a/anki_vector/viewer.py +++ b/anki_vector/viewer.py @@ -21,6 +21,10 @@ import multiprocessing as mp import sys import threading +import time +import random +import uuid +from datetime import datetime, timedelta try: from PIL import Image @@ -29,6 +33,7 @@ from . import util from .events import Events +from .ml.agent import MLAgent class ViewerComponent(util.Component): @@ -56,6 +61,9 @@ def __init__(self, robot): self._close_event: mp.Event = None self._frame_queue: mp.Queue = None self._process = None + self._mlAgent = MLAgent() + self._last_upload_timestamp = 0 + self._next_upload_timestamp = 0 def show(self, timeout: float = 10.0, force_on_top: bool = True) -> None: """Render a video stream using the images obtained from @@ -117,6 +125,15 @@ def close(self) -> None: self._process.terminate() self._process = None + def _get_next_upload_time_delta(self): + """ + Generate the timedelta after which the next image should be uploaded. + Currently the timedelta is configured to be between 20 sec and 1 minute + """ + rand = random.randint(20, 60) + delta = timedelta(seconds=rand) + return delta + def enqueue_frame(self, image: Image.Image): """Sends a frame to the viewer's rendering process. Sending `None` to the viewer will cause it to gracefully shutdown. @@ -139,9 +156,19 @@ def enqueue_frame(self, image: Image.Image): :param image: A frame from Vector's camera. """ close_event = self._close_event + current_time = datetime.now() + if not self._last_upload_timestamp or current_time >= self._next_upload_timestamp: + if self._last_upload_timestamp: + imageName = uuid.uuid4() + self._mlAgent.upload_image(image, 'robot' + str(imageName)) + self._last_upload_timestamp = current_time + self._next_upload_timestamp = \ + current_time + self._get_next_upload_time_delta() + + processed_image = self._mlAgent.run_inference(image) if self._frame_queue is not None and close_event is not None and not close_event.is_set(): try: - self._frame_queue.put(image, False) + self._frame_queue.put(processed_image, False) except mp.queues.Full: pass diff --git a/examples/tutorials/19_show_video_feed.py b/examples/tutorials/19_show_video_feed.py new file mode 100644 index 0000000..885b792 --- /dev/null +++ b/examples/tutorials/19_show_video_feed.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python3 + +# Copyright (c) 2021 Amitabha Banerjee +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License in the file LICENSE.txt or at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +'''Display a GUI window with video feed from Vector +''' + +import time +import argparse + +import anki_vector +from anki_vector.util import degrees + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + args = anki_vector.util.parse_command_args(parser) + with anki_vector.Robot(serial=args.serial, show_viewer=True) as robot: + robot.behavior.set_head_angle(degrees(3.0)) + time.sleep(240) + robot.viewer.close() diff --git a/requirements.txt b/requirements.txt index 509c1f1..1023ea8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,4 +4,6 @@ flask googleapis-common-protos numpy>=1.11 Pillow>=3.3 -requests \ No newline at end of file +requests +protobuf==3.20 +requests_toolbelt