Source code for viam.services.vision.vision

import abc
import sys
from typing import Final, List, Mapping, Optional

from viam.media.video import ViamImage
from viam.proto.common import PointCloudObject
from viam.proto.service.vision import Classification, Detection, GetPropertiesResponse
from viam.resource.types import RESOURCE_NAMESPACE_RDK, RESOURCE_TYPE_SERVICE, Subtype
from viam.utils import ValueTypes

from ..service_base import ServiceBase

if sys.version_info >= (3, 10):
    from typing import TypeAlias
else:
    from typing_extensions import TypeAlias


[docs]class CaptureAllResult: """ CaptureAllResult represents the collection of things that you have requested from the CaptureAllFromCamera method. This is used most often for visualization purposes, since normally, returning the image on every call to a classifier/detector/etc would be costly and unnecessary. The default result for each field is None rather than the empty list to distinguish between "there was no request for the classifier/detector to return a result" vs. "the classifier/detector was requested, but there were no results". """ def __init__( self, image: Optional[ViamImage] = None, classifications: Optional[List[Classification]] = None, detections: Optional[List[Detection]] = None, objects: Optional[List[PointCloudObject]] = None, extra: Optional[Mapping[str, ValueTypes]] = None, ): """ Args: image (ViamImage|None): The image from the GetImage request of the camera, if it was requested. classifications (List[Classification]|None): The classifications from GetClassifications, if it was requested. detections (List[Detection]|None): The detections from GetDetections, if it was requested. objects (List[PointCloudObject]|None): the object point clouds from GetObjectPointClouds, if it was requested. extra (dict): A catch all structure, usually for metadata, that a module writer might want to return. Default empty. Returns: None """ self.image = image self.classifications = classifications self.detections = detections self.objects = objects self.extra = extra
[docs]class Vision(ServiceBase): """ Vision represents a Vision service. This acts as an abstract base class for any drivers representing specific vision implementations. This cannot be used on its own. If the ``__init__()`` function is overridden, it must call the ``super().__init__()`` function. For more information, see `Computer Vision service <https://docs.viam.com/dev/reference/apis/services/vision/>`_. """ SUBTYPE: Final = Subtype( # pyright: ignore [reportIncompatibleVariableOverride] RESOURCE_NAMESPACE_RDK, RESOURCE_TYPE_SERVICE, "vision" ) Properties: "TypeAlias" = GetPropertiesResponse """ Properties is a class that states what features are supported on the associated vision service. Currently, these are the following properties: - classifications_supported (bool): GetClassifications and GetClassificationsFromCamera are implemented. - detections_supported (bool): GetDetections and GetDetectionsFromCamera are implemented. - object_point_clouds_supported (bool): GetObjectPointClouds is implemented. """
[docs] @abc.abstractmethod async def capture_all_from_camera( self, camera_name: str, return_image: bool = False, return_classifications: bool = False, return_detections: bool = False, return_object_point_clouds: bool = False, *, extra: Optional[Mapping[str, ValueTypes]] = None, timeout: Optional[float] = None, ) -> CaptureAllResult: """Get the next image, detections, classifications, and objects all together, given a camera name. Used for visualization. :: my_detector = VisionClient.from_robot(machine, "my_detector") # Get the captured data for a camera result = await my_detector.capture_all_from_camera( "my_camera", return_image=True, return_detections=True, ) image = result.image detections = result.detections Args: camera_name (str): The name of the camera to use for detection return_image (bool): Ask the vision service to return the camera's latest image return_classifications (bool): Ask the vision service to return its latest classifications return_detections (bool): Ask the vision service to return its latest detections return_object_point_clouds (bool): Ask the vision service to return its latest 3D segmentations Returns: vision.CaptureAllResult: A class that stores all potential returns from the vision service. It can return the image from the camera along with its associated detections, classifications, and objects, as well as any extra info the model may provide. For more information, see `Computer Vision service <https://docs.viam.com/dev/reference/apis/services/vision/#captureallfromcamera>`_. """ ...
[docs] @abc.abstractmethod async def get_detections_from_camera( self, camera_name: str, *, extra: Optional[Mapping[str, ValueTypes]] = None, timeout: Optional[float] = None, ) -> List[Detection]: """Get a list of detections in the next image given a camera and a detector :: my_detector = VisionClient.from_robot(robot=machine, "my_detector") # Get detections for the next image from the specified camera detections = await my_detector.get_detections_from_camera("my_camera") Args: camera_name (str): The name of the camera to use for detection Raises: ViamError: Raised if given an image without a specified width and height Returns: List[viam.proto.service.vision.Detection]: A list of 2D bounding boxes, their labels, and the confidence score of the labels, around the found objects in the next 2D image from the given camera, with the given detector applied to it. For more information, see `Computer Vision service <https://docs.viam.com/dev/reference/apis/services/vision/#getdetectionsfromcamera>`_. """ ...
[docs] @abc.abstractmethod async def get_detections( self, image: ViamImage, *, extra: Optional[Mapping[str, ValueTypes]] = None, timeout: Optional[float] = None, ) -> List[Detection]: """Get a list of detections in the given image using the specified detector :: my_camera = Camera.from_robot(robot=machine, "my_camera") my_detector = VisionClient.from_robot(robot=machine, "my_detector") # Get an image from the camera img = await my_camera.get_image() # Get detections for that image detections = await my_detector.get_detections(img) Args: image (ViamImage): The image to get detections for Raises: ViamError: Raised if given an image without a specified width and height Returns: List[viam.proto.service.vision.Detection]: A list of 2D bounding boxes, their labels, and the confidence score of the labels, around the found objects in the next 2D image from the given camera, with the given detector applied to it. For more information, see `Computer Vision service <https://docs.viam.com/dev/reference/apis/services/vision/#getdetections>`_. """ ...
[docs] @abc.abstractmethod async def get_classifications_from_camera( self, camera_name: str, count: int, *, extra: Optional[Mapping[str, ValueTypes]] = None, timeout: Optional[float] = None, ) -> List[Classification]: """Get a list of classifications in the next image given a camera and a classifier :: my_classifier = VisionClient.from_robot(robot=machine, "my_classifier") # Get the 2 classifications with the highest confidence scores for the next image from the camera classifications = await my_classifier.get_classifications_from_camera( "my_camera", 2) Args: camera_name (str): The name of the camera to use for detection count (int): The number of classifications desired returns: List[viam.proto.service.vision.Classification]: The list of Classifications For more information, see `Computer Vision service <https://docs.viam.com/dev/reference/apis/services/vision/#getclassificationsfromcamera>`_. """ ...
[docs] @abc.abstractmethod async def get_classifications( self, image: ViamImage, count: int, *, extra: Optional[Mapping[str, ValueTypes]] = None, timeout: Optional[float] = None, ) -> List[Classification]: """Get a list of classifications in the given image using the specified classifier :: my_camera = Camera.from_robot(robot=machine, "my_camera") my_classifier = VisionClient.from_robot(robot=machine, "my_classifier") # Get an image from the camera img = await my_camera.get_image() # Get the 2 classifications with the highest confidence scores for the image classifications = await my_classifier.get_classifications(img, 2) Args: image (ViamImage): The image to get detections for count (int): The number of classifications desired Returns: List[viam.proto.service.vision.Classification]: The list of Classifications For more information, see `Computer Vision service <https://docs.viam.com/dev/reference/apis/services/vision/#getclassifications>`_. """ ...
[docs] @abc.abstractmethod async def get_object_point_clouds( self, camera_name: str, *, extra: Optional[Mapping[str, ValueTypes]] = None, timeout: Optional[float] = None, ) -> List[PointCloudObject]: """ Returns a list of the 3D point cloud objects and associated metadata in the latest picture obtained from the specified 3D camera (using the specified segmenter). To deserialize the returned information into a numpy array, use the Open3D library. :: import numpy as np import open3d as o3d my_segmenter = VisionClient.from_robot(robot=machine, "my_segmenter") # Get the objects from the camera output objects = await my_segmenter.get_object_point_clouds("my_camera") # write the first object point cloud into a temporary file with open("/tmp/pointcloud_data.pcd", "wb") as f: f.write(objects[0].point_cloud) pcd = o3d.io.read_point_cloud("/tmp/pointcloud_data.pcd") points = np.asarray(pcd.points) Args: camera_name (str): The name of the camera Returns: List[viam.proto.common.PointCloudObject]: The pointcloud objects with metadata For more information, see `Computer Vision service <https://docs.viam.com/dev/reference/apis/services/vision/#getobjectpointclouds>`_. """ ...
[docs] @abc.abstractmethod async def get_properties( self, *, extra: Optional[Mapping[str, ValueTypes]] = None, timeout: Optional[float] = None, ) -> Properties: """ Get info about what vision methods the vision service provides. Currently returns boolean values that state whether the service implements the classification, detection, and/or 3D object segmentation methods. :: my_detector = VisionClient.from_robot(robot=machine, "my_detector") properties = await my_detector.get_properties() detections_supported = properties.detections_supported classifications_supported = properties.classifications_supported Returns: Properties: The properties of the vision service For more information, see `Computer Vision service <https://docs.viam.com/dev/reference/apis/services/vision/#getproperties>`_. """ ...