Source code for smqtk.web.descriptor_service

import mimetypes
import multiprocessing
import os

import flask
import requests

from smqtk.algorithms import DescriptorGenerator
from smqtk.representation import DescriptorElementFactory
from smqtk.representation.data_element.file_element import DataFileElement
from smqtk.representation.data_element.memory_element import DataMemoryElement
from smqtk.representation.data_element.url_element import DataUrlElement
from smqtk.utils import SimpleTimer
from smqtk.utils.configuration import (
    from_config_dict,
    make_default_config,
)
from smqtk.utils.dict import merge_dict
from smqtk.web import SmqtkWebApp


MIMETYPES = mimetypes.MimeTypes()


SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))


[docs]class DescriptorServiceServer (SmqtkWebApp): """ Simple server that takes in a specification of the following form: /<descriptor_type>/<uri>[?...] See the docstring for the ``DescriptorServiceServer.compute_descriptor()`` method for complete rules on how to form a calling URL. Computes the requested descriptor for the given file and returns that via a JSON structure. Standard return JSON:: { "success": <bool>, "descriptor": [ <float>, ... ] "message": <string>, "reference_uri": <uri> } Additional Configuration .. note:: We will look for an environment variable `DescriptorService_CONFIG` for a string file path to an additional JSON configuration file to consider. """
[docs] @classmethod def is_usable(cls): return True
[docs] @classmethod def get_default_config(cls): """ Generate and return a default configuration dictionary for this class. This will be primarily used for generating what the configuration dictionary would look like for this class without instantiating it. :return: Default configuration dictionary for the class. :rtype: dict """ c = super(DescriptorServiceServer, cls).get_default_config() merge_dict(c, { "descriptor_factory": DescriptorElementFactory.get_default_config(), "descriptor_generators": { "example": make_default_config(DescriptorGenerator.get_impls()) } }) return c
def __init__(self, json_config): """ Initialize application based of supplied JSON configuration :param json_config: JSON configuration dictionary :type json_config: dict """ super(DescriptorServiceServer, self).__init__(json_config) # Descriptor factory setup self._log.info("Initializing DescriptorElementFactory") self.descr_elem_factory = DescriptorElementFactory.from_config( self.json_config['descriptor_factory'] ) # Descriptor generator configuration labels #: :type: dict[str, dict] self.generator_label_configs = self.json_config['descriptor_generators'] # Cache of DescriptorGenerator instances so we don't have to # continuously initialize them as we get requests. self.descriptor_cache = {} self.descriptor_cache_lock = multiprocessing.RLock() @self.route("/") def list_ingest_labels(): return flask.jsonify({ "labels": sorted(self.generator_label_configs) }) @self.route("/all/content_types") def all_content_types(): """ Of available descriptors, what content types are processable, and what types are associated to which available descriptor generator. """ all_types = set() # Mapping of configuration label to content types that generator # can handle r = {} for l in self.generator_label_configs: d = self.get_descriptor_inst(l) all_types.update(d.valid_content_types()) r[l] = sorted(d.valid_content_types()) return flask.jsonify({ "all": sorted(all_types), "by-label": r }) @self.route("/all/compute/<path:uri>") def all_compute(uri): """ Compute descriptors over the specified content for all generators that function over the data's content type. JSON Return format:: { "success": <bool> "content_type": <str> "message": <str> "descriptors": { "<label>": <list[float]>, ... } | None "reference_uri": <str> } """ message = "execution nominal" data_elem = None try: data_elem = self.resolve_data_element(uri) except ValueError as ex: message = "Failed URI resolution: %s" % str(ex) descriptors = {} finished_loop = False if data_elem: for l in self.generator_label_configs: if data_elem.content_type() in \ self.get_descriptor_inst(l).valid_content_types(): d = None try: d = self.generate_descriptor(data_elem, l) except RuntimeError as ex: message = "Descriptor extraction failure: %s" \ % str(ex) except ValueError as ex: message = "Data content type issue: %s" % str(ex) descriptors[l] = d and d.vector().tolist() if not descriptors: message = "No descriptors can handle URI content type: %s" \ % data_elem.content_type else: finished_loop = True return flask.jsonify({ "success": finished_loop, "content_type": data_elem.content_type(), "message": message, "descriptors": descriptors, "reference_uri": uri }) @self.route("/<string:descriptor_label>/<path:uri>") def compute_descriptor(descriptor_label, uri): """ Data modes for upload/use:: - local filepath - base64 - http/s URL The following sub-sections detail how different URI's can be used. Local Filepath -------------- The URI string must be prefixed with ``file://``, followed by the full path to the data file to describe. Base 64 data ------------ The URI string must be prefixed with "base64://", followed by the base64 encoded string. This mode also requires an additional ``?content_type=`` to provide data content type information. This mode saves the encoded data to temporary file for processing. HTTP/S address -------------- This is the default mode when the URI prefix is none of the above. This uses the requests module to locally download a data file for processing. JSON Return format:: { "success": <bool> "message": <str> "descriptor": <None|list[float]> "reference_uri": <str> } :type descriptor_label: str :type uri: str """ message = "execution nominal" descriptor = None de = None try: de = self.resolve_data_element(uri) except ValueError as ex: message = "URI resolution issue: %s" % str(ex) if de: try: descriptor = self.generate_descriptor(de, descriptor_label) except RuntimeError as ex: message = "Descriptor extraction failure: %s" % str(ex) except ValueError as ex: message = "Data content type issue: %s" % str(ex) return flask.jsonify({ "success": descriptor is not None, "message": message, "descriptor": (descriptor is not None and descriptor.vector().tolist()) or None, "reference_uri": uri })
[docs] def get_config(self): return self.json_config
[docs] def get_descriptor_inst(self, label): """ Get the cached content descriptor instance for a configuration label :type label: str :rtype: smqtk.algorithms.descriptor_generator.DescriptorGenerator """ with self.descriptor_cache_lock: if label not in self.descriptor_cache: self._log.debug("Caching descriptor '%s'", label) self.descriptor_cache[label] = \ from_config_dict( self.generator_label_configs[label], DescriptorGenerator.get_impls() ) return self.descriptor_cache[label]
[docs] def resolve_data_element(self, uri): """ Given the URI to some data, resolve it down to a DataElement instance. :raises ValueError: Issue with the given URI regarding either URI source resolution or data resolution. :param uri: URI to data :type uri: str :return: DataElement instance wrapping given URI to data. :rtype: smqtk.representation.DataElement """ # Resolve URI into appropriate DataElement instance if uri[:7] == "file://": self._log.debug("Given local disk filepath") filepath = uri[7:] if not os.path.isfile(filepath): raise ValueError("File URI did not point to an existing file " "on disk.") else: de = DataFileElement(filepath) elif uri[:9] == "base64://": self._log.debug("Given base64 string") content_type = flask.request.args.get('content_type', None) self._log.debug("Content type: %s", content_type) if not content_type: raise ValueError("No content-type with given base64 data") else: b64str = uri[9:] de = DataMemoryElement.from_base64(b64str, content_type) else: self._log.debug("Given URL") try: de = DataUrlElement(uri) except requests.HTTPError as ex: raise ValueError("Failed to initialize URL element due to " "HTTPError: %s" % str(ex)) return de
[docs] def generate_descriptor(self, de, cd_label): """ Generate a descriptor for the content pointed to by the given URI using the specified descriptor generator. :raises ValueError: Content type mismatch given the descriptor generator :raises RuntimeError: Descriptor extraction failure. :type de: smqtk.representation.DataElement :type cd_label: str :return: Generated descriptor element instance with vector information. :rtype: smqtk.representation.DescriptorElement """ with SimpleTimer("Computing descriptor...", self._log.debug): cd = self.get_descriptor_inst(cd_label) descriptor = cd.generate_one_element( de, descr_factory=self.descr_elem_factory ) return descriptor
APPLICATION_CLASS = DescriptorServiceServer