"""
obspy.core.event.resourceid - ResourceIdentifier
================================================
This module defines the ResourceIdentifier class and associated code.
:copyright:
The ObsPy Development Team (devs@obspy.org)
:license:
GNU Lesser General Public License, Version 3
(https://www.gnu.org/copyleft/lesser.html)
"""
import re
import warnings
from contextlib import contextmanager
from copy import deepcopy
from uuid import uuid4
from weakref import WeakKeyDictionary, WeakValueDictionary
from obspy.core.util.decorator import deprecated
class _ResourceKey(object):
"""
A private semi-singleton class used to refer id strings to objects.
Only one instance should be created for each unique string/int, unless
all reference are garbage collect, in which case a new instance will be
created. Constructing instances through the get_resource_key class method
is required to ensure this behavior.
This class allows the python gc to handle cleanup of the
ResourceIdentifier stored class state rather than manual reference
counting as was implemented before.
"""
# define a mapping from a resource string to a singleton instance
_singleton_cache = WeakValueDictionary()
def __deepcopy__(self, memodict={}):
memodict[id(self)] = self
return self
def __copy__(self):
return self
@classmethod
def get_resource_key(cls, unique_id):
if unique_id not in _ResourceKey._singleton_cache:
single = _ResourceKey()
_ResourceKey._singleton_cache[unique_id] = single
return _ResourceKey._singleton_cache[unique_id]
class _ResourceKeyDescriptor(object):
"""
A private descriptor for initializing _Resource_Key instances.
"""
def __init__(self, name, default=None):
self.name = name + '__'
self.default = default
def __get__(self, instance, owner):
return getattr(instance, self.name, self.default)
def __set__(self, instance, value):
# if an object was passed, use the id of the object for hash
if value is not None:
if not isinstance(value, (int, str)):
value = id(value)
setattr(instance, self.name, _ResourceKey.get_resource_key(value))
[docs]
class ResourceIdentifier(object):
r"""
Unique identifier referring to a resource.
In QuakeML many elements and types can have a unique id that other elements
use to refer to it. This is called a ResourceIdentifier and it is used for
the same purpose in the obspy.core.event classes. The id must be a string.
In QuakeML it has to be of the following regex form::
(smi|quakeml):[\w\d][\w\d\-\.\*\(\)_~']{2,}/[\w\d\-\.\*\(\)_~']
[\w\d\-\.\*\(\)\+\?_~'=,;#/&]*
e.g.
* ``smi:sub.website.org/event/12345678``
* ``quakeml:google.org/pick/unique_pick_id``
smi stands for "seismological meta-information".
:type id: str, optional
:param id: A string to uniquely identify a resource. If no resource_id
is given, uuid.uuid4() will be used to create one which assures
uniqueness within the current Python run. If no fixed id is provided,
the ID will be built from prefix and a random uuid hash. The random
hash can be regenerated by the referred object automatically if it
gets changed.
:type prefix: str, optional
:param prefix: An optional identifier that will be put in front of any
automatically created resource id. The prefix will only have an effect
if `id` is not specified (for a fixed ID string). Makes automatically
generated resource ids more reasonable. By default "smi:local" is used
which ensures a QuakeML compliant resource identifier.
:type referred_object: object, optional
:param referred_object: The object (resource) to which this instance
refers. All instances created with the same resource_id will be able
to access the object as long as at least one instance actually has a
reference to it. Additionally, ResourceIdentifier instances that have
the same id but refer to different objects will still return the
correct referred object, provided it doesn't get garbage collected.
If the referred object no longer exists the last object registered
with the same id will be returned, or None if no such object exists.
:type parent: hashable
:param parent:
The parent to use as a namespace for the resource identifier. This
allows potentially interconnected resource identifiers to be sensibly
grouped together, ensuring that objects belonging to the same parent
are returned by resource ids of the parent. Used primarily to ensure
resource_ids in an event refer to other objects in the same event.
All resource ids must be unique within the parent namespace.
.. rubric:: General Usage
>>> ResourceIdentifier('2012-04-11--385392')
ResourceIdentifier(id="2012-04-11--385392")
>>> # If 'id' is not specified it will be generated automatically.
>>> ResourceIdentifier() # doctest: +ELLIPSIS
ResourceIdentifier(id="smi:local/...")
>>> # Supplying a prefix will simply prefix the automatically generated ID
>>> ResourceIdentifier(prefix='event') # doctest: +ELLIPSIS
ResourceIdentifier(id="event/...")
ResourceIdentifiers can, and oftentimes should, carry a reference to the
object they refer to. This is a weak reference which means that if the
object get deleted or runs out of scope, e.g. gets garbage collected, the
reference will cease to exist.
>>> from obspy.core.event import Event
>>> event = Event()
>>> import sys
>>> ref_count = sys.getrefcount(event)
>>> res_id = ResourceIdentifier(referred_object=event)
>>> # The reference does not changed the reference count of the object.
>>> print(ref_count == sys.getrefcount(event))
True
>>> # It actually is the same object.
>>> print(event is res_id.get_referred_object())
True
>>> # Deleting it, or letting the garbage collector handle the object will
>>> # invalidate the reference.
>>> del event
>>> print(res_id.get_referred_object()) # doctest: +SKIP
None
The most powerful ability (and reason why one would want to use a resource
identifier class in the first place) is that once a ResourceIdentifier with
an attached referred object has been created, any other ResourceIdentifier
instances with the same ID can retrieve that object. This works
across all ResourceIdentifiers that have been instantiated within one
Python run.
This enables, e.g. the resource references between the different QuakeML
elements to work in a rather natural way.
>>> event_object = Event()
>>> obj_id = id(event_object)
>>> res_id = "obspy.org/event/test"
>>> ref_a = ResourceIdentifier(res_id)
>>> # The object is refers to cannot be found yet. Because no instance that
>>> # an attached object has been created so far.
>>> print(ref_a.get_referred_object())
None
>>> # This instance has an attached object.
>>> ref_b = ResourceIdentifier(res_id, referred_object=event_object)
>>> ref_c = ResourceIdentifier(res_id)
>>> # All ResourceIdentifiers will refer to the same object.
>>> assert ref_a.get_referred_object() is event_object
>>> assert ref_b.get_referred_object() is event_object
>>> assert ref_c.get_referred_object() is event_object
Resource identifiers are bound to an object once the get_referred_object
method has been called. The results is that get_referred_object will
always return the same object it did on the first call as long as the
object still exists. If the bound object gets garage collected a warning
will be issued and another object with the same resource_id will be
returned (if one exists). If no other object is associated with the same
resource_id, an additional warning will be issued and None returned.
>>> from obspy import UTCDateTime
>>> res_id = 'obspy.org/tests/test_resource_doc_example'
>>> obj_a = UTCDateTime(10)
>>> obj_b = UTCDateTime(10)
>>> ref_a = ResourceIdentifier(res_id, referred_object=obj_a)
>>> ref_b = ResourceIdentifier(res_id, referred_object=obj_b)
>>> assert ref_a.get_referred_object() == ref_b.get_referred_object()
>>> assert ref_a.get_referred_object() is not ref_b.get_referred_object()
>>> assert ref_a.get_referred_object() is obj_a
>>> assert ref_b.get_referred_object() is obj_b
>>> del obj_b # obj_b gets garbage collected
>>> assert ref_b.get_referred_object() is obj_a # doctest: +SKIP
>>> del obj_a # now no object with res_id exists
>>> assert ref_b.get_referred_object() is None # doctest: +SKIP
ResourceIdentifiers are considered identical if the IDs are
the same.
>>> # Create two different resource identifiers.
>>> res_id_1 = ResourceIdentifier()
>>> res_id_2 = ResourceIdentifier()
>>> res_id_3 = ResourceIdentifier(id=res_id_2.id)
>>> assert res_id_1 != res_id_2
>>> assert res_id_2 == res_id_3
ResourceIdentifier instances can be used as dictionary keys.
>>> dictionary = {}
>>> res_id = ResourceIdentifier(id="foo")
>>> dictionary[res_id] = "bar1"
>>> # The same ID can still be used as a key.
>>> dictionary["foo"] = "bar2"
>>> items = sorted(dictionary.items(), key=lambda kv: kv[1])
>>> for k, v in items: # doctest: +ELLIPSIS
... print(repr(k), v)
ResourceIdentifier(id="foo") bar1
...'foo' bar2
Because ResourceIdentifier instances are hashed based on their id
attribute, you should never change it once it has been set. Create a new
ResourceIdentifier object instead.
"""
# Class (not instance) attributes that keeps track of all resource
# identifier instances throughout one Python run. Will only store weak
# references and therefore does not interfere with the garbage collection.
# DO NOT CHANGE THIS FROM OUTSIDE THE CLASS. Use the _debug_class_state
# method for temporarily altering these for testing or debugging.
# A dict for keeping track of parent/object specific references.
# weak_key, weak_key, weak_value
# {parent_key: {resource_key: object_id}}
_parent_id_tree = WeakKeyDictionary()
# Dict of list to keep track of the order in which resource_ids are
# assigned to object_ids.
# {resource_id: [object_id, object_id, ...]}
_id_order = WeakKeyDictionary()
# A weak value dict mapping object ids (not resource ids) to objects.
# {object_id: object}
_id_object_map = WeakValueDictionary()
# A get objects hook to allow plugging in custom logic for finding objects
# with by resource_id if they are not found via normal means.
_get_object_hook = []
# Set default _ResourceKey attributes and object_id.
_parent_key = _ResourceKeyDescriptor('_parent_key')
_resource_key = _ResourceKeyDescriptor('_resource_key')
_object_id = None
[docs]
def __init__(self, id=None, prefix="smi:local", referred_object=None,
parent=None):
# Create a resource id if None is given and possibly use a prefix.
if id is None:
self.fixed = False
self._prefix = prefix
self._uuid = str(uuid4())
elif isinstance(id, ResourceIdentifier):
self.__dict__.update(id.__dict__)
return
else:
self.fixed = True
self.id = id
# get resource and parent _ResourceKey singletons
self._resource_key = self.id
self._parent_key = parent
# set referred object if one was provided.
if referred_object is not None:
self.set_referred_object(referred_object)
[docs]
def get_referred_object(self):
"""
Returns the object associated with the resource identifier.
This works as long as at least one ResourceIdentifier with the same
ID as this instance has an associated object. If not, this method will
return None.
"""
try:
out = ResourceIdentifier._id_object_map[self._object_key]
except KeyError:
out = self._get_similar_referred_object()
if out is not None:
return out
else: # If no object was found iterate get_object hooks.
for hook in self._get_object_hook:
out = hook(self.id)
if out is not None:
self.set_referred_object(out)
return out
[docs]
def _get_similar_referred_object(self):
"""
Find an object with the same resource id.
If the resource_identifier instance is not bound to a specific object,
or the bound object has been garbage collected, try to find another
object that was assigned the same resource_id.
The parent_id_tree will be scanned first to see if any object_ids
have been assigned to the same resource_id in the parent scope. If
not simply use the last object assigned the resource_id. If no object
is found return None.
"""
# Warn if resource_id was bound but its object got gc'ed.
if self._object_id is not None:
msg = ("The object with identity of: %d and id of %s no longer "
"exists, trying to find an object with the same id"
) % (self._object_id, self.id)
warnings.warn(msg, UserWarning)
# try to get the object_id from the parent_id_tree
obj = self._get_object_from_parent_scope()
if obj is None:
# else try to find the last object assigned the same resource_id
obj = self._get_newest_object_with_same_resource_id()
# if an object was found bind it to resource_id and return it
if obj is not None:
self.set_referred_object(obj, warn=False)
return obj
[docs]
def _get_newest_object_with_same_resource_id(self):
"""
Return the newest object which has been bound to the same resource_id.
If No such object exists return None.
"""
try:
rid_list = ResourceIdentifier._id_order[self._resource_key]
except KeyError: # no existing object is assigned to this resource_id
return None
while len(rid_list):
try:
obj_key = ResourceIdentifier._id_order[self._resource_key][-1]
except KeyError:
return None
else:
try:
obj = ResourceIdentifier._id_object_map[obj_key]
except KeyError: # Object got gc'ed or was not defined.
rid_list.pop() # Remove last element in list.
continue
# Ensure the current resource_id matches the old one, there
# can, in rare cases, be a mismatch due to issue #2278.
if obj_key[-1] != self.id:
rid_list.pop()
continue
return obj
[docs]
def _get_object_from_parent_scope(self):
"""
Find an object in the same parent scope with the same resource_id. If
No such object is found return None.
"""
id_tree = ResourceIdentifier._parent_id_tree
try:
obj_key = id_tree[self._parent_key][self._resource_key]
except (KeyError, TypeError):
pass
else:
try:
obj = ResourceIdentifier._id_object_map[obj_key]
except KeyError:
pass
else:
if obj_key[-1] == self.id:
return obj
[docs]
def set_referred_object(self, referred_object, warn=True, parent=None):
"""
Bind an object to the ResourceIdentifier instance.
Also allows the object to be bound to a parent scope. Will emit a
warning if referred_object is not equal to the last object referred
to by the same resource_id, or, if the resource_id does not refer to
an object that currently exists, the last object assigned to the same
resource id code.
:param referred_object: The object to which the resource id refers.
:type referred_object: object
:param warn:
If True, issue a warning if the referred_object is not equal to
the last object assigned the same resource_id.
:type warn: bool
:param parent:
An object or int (id) to which the resource_id should be scoped.
Used, for example, to ensure the all resource ids belonging to an
event object are event-scoped.
:type parent: object, int
"""
# Get the last object bound to this instance of ResourceIdentifier or
# if there is None, get the last referred_object assigned the same
# resource_id code.
id_order = ResourceIdentifier._id_order
old = ResourceIdentifier._id_object_map.get(self._object_key, None)
if old is None: # Look for last object with same resource id.
try:
old_obj_id_key = id_order[self._resource_key][-1]
old = ResourceIdentifier._id_object_map[old_obj_id_key]
except (KeyError, IndexError):
pass
if warn and old is not None and old != referred_object:
msg = ('Warning, binding object to resource ID %s which '
'is not equal to the last object bound to this '
'resource_id') % self.id
warnings.warn(msg, UserWarning)
# Set the object id to the new object, and update parent scoping tree.
self._object_id = id(referred_object)
if parent is not None or self._parent_key is not None:
if parent is not None:
self._parent_key = parent
id_tree = ResourceIdentifier._parent_id_tree
if self._parent_key not in id_tree:
id_tree[self._parent_key] = WeakKeyDictionary()
id_tree[self._parent_key][self._resource_key] = self._object_key
# Set the new id in id map and append referred_object to id_order.
ResourceIdentifier._id_object_map[self._object_key] = referred_object
if self._resource_key not in id_order:
id_order[self._resource_key] = []
id_order[self._resource_key].append(self._object_key)
[docs]
@deprecated()
def convert_id_to_quakeml_uri(self, authority_id="local"):
"""
Converts the current ID to a valid QuakeML URI.
This method is deprecated, use :meth:`get_quakeml_id` instead.
Only an invalid QuakeML ResourceIdentifier string it will be converted
to a valid one. Otherwise nothing will happen but after calling this
method the user can be sure that the ID is a valid QuakeML URI.
The resulting ID will be of the form
smi:authority_id/prefix/resource_id
:type authority_id: str, optional
:param authority_id: The base url of the resulting string. Defaults to
``"local"``.
"""
self.id = self.get_quakeml_uri_str(authority_id=authority_id)
[docs]
def get_quakeml_id(self, authority_id="local"):
"""
Returns a resource id with a valid QuakeML URI.
Only an invalid QuakeML ResourceIdentifier string it will be converted
to a valid one. Otherwise the returned resource id will be identical
to the original.
The new resource id will have the same referred object as the
original.
The resulting ID will be of the form
smi:authority_id/prefix/resource_id
:type authority_id: str, optional
:param authority_id: The base url of the resulting string. Defaults to
``"local"``.
:return: A new ResourceIdentifier instance with a valid quakeml uri.
"""
new_id = self.get_quakeml_uri_str(authority_id=authority_id)
rid = ResourceIdentifier(new_id)
referred_obj = self.get_referred_object()
if referred_obj is not None:
rid.set_referred_object(referred_obj, warn=False,
parent=self._parent_key)
return rid
[docs]
@deprecated()
def get_quakeml_uri(self, authority_id="local"):
"""
This method is deprecated, use :meth:`.get_quakeml_uri_str` instead.
"""
return self.get_quakeml_uri_str(authority_id=authority_id)
[docs]
def get_quakeml_uri_str(self, authority_id="local"):
"""
Returns an id with a valid QuakeML URI.
If no valid QuakeML is possible a ValueError is raised.
:type authority_id: str, optional
:param authority_id: The base url of the resulting string. Defaults to
``"local"``.
:return: A new ResourceIdentifier instance with a valid quakeml uri.
>>> res_id = ResourceIdentifier("some_id")
>>> print(res_id.get_quakeml_uri_str())
smi:local/some_id
>>> # Did not change the actual resource id.
>>> print(res_id.id)
some_id
"""
id = self.id
if str(id).strip() == "":
id = str(uuid4())
regex = r"^(smi|quakeml):[\w\d][\w\d\-\.\*\(\)_~']{2,}/[\w\d\-\." + \
r"\*\(\)_~'][\w\d\-\.\*\(\)\+\?_~'=,;#/&]*$"
result = re.match(regex, str(id))
if result is not None:
return id
id = 'smi:%s/%s' % (authority_id, str(id))
# Check once again just to be sure no weird symbols are stored in the
# ID.
result = re.match(regex, id)
if result is None:
msg = (
"The id '%s' is not a valid QuakeML resource "
"identifier. ObsPy tried modifying it to '%s' but it is still "
"not valid. Please make sure all resource ids are either "
"valid or can be made valid by prefixing them with "
"'smi:<authority_id>/'. Valid ids are specified in the "
"QuakeML manual section 3.1 and in particular exclude colons "
"for the final part." % (self.id, id))
raise ValueError(msg)
return id
[docs]
def copy(self):
"""
Returns a copy of the ResourceIdentifier.
>>> res_id = ResourceIdentifier()
>>> res_id_2 = res_id.copy()
>>> print(res_id is res_id_2)
False
>>> print(res_id == res_id_2)
True
"""
return deepcopy(self)
[docs]
def __deepcopy__(self, memodict={}):
new = ResourceIdentifier.__new__(ResourceIdentifier)
new.__dict__.update(self.__dict__)
# clear object_id upon copying resource_ids
new._parent_key = None
memodict[id(self)] = new
return new
[docs]
def __setstate__(self, state):
"""
Make sure the resource_key follows the singleton pattern.
"""
self.__dict__ = state
self._parent_key = None
self._resource_key = _ResourceKey.get_resource_key(self.id)
@property
def _object_key(self):
"""
The value used to identify objects bound to resource_ids.
Uses a hash of both the object id and resource id, see #2278.
"""
return self._object_id, self.id
@property
def id(self):
"""
Unique identifier of the current instance.
"""
if self.fixed:
return self.__dict__.get("id")
else:
id = self.prefix
if not id.endswith("/"):
id += "/"
id += self.uuid
return id
@id.deleter
def id(self):
msg = "The resource id cannot be deleted."
raise Exception(msg)
@id.setter
def id(self, value):
self.fixed = True
if not isinstance(value, str):
msg = "attribute id needs to be a string."
raise TypeError(msg)
if '_resource_key__' in self.__dict__:
msg = ('overwritting the id attribute of a ResourceIdentifier'
'object is very dangerous and will raise an exception in '
'a future version of obspy')
warnings.warn(msg, UserWarning)
self.__dict__["id"] = value
@property
def prefix(self):
return self._prefix
@prefix.deleter
def prefix(self):
self._prefix = ""
@prefix.setter
def prefix(self, value):
if not isinstance(value, str):
msg = "prefix id needs to be a string."
raise TypeError(msg)
self._prefix = value
@property
def uuid(self):
return self._uuid
@uuid.deleter
def uuid(self):
"""
Deleting is uuid hash is forbidden and will not work.
"""
msg = "The uuid cannot be deleted."
raise Exception(msg)
@uuid.setter
def uuid(self, value): # @UnusedVariable
"""
Setting is uuid hash is forbidden and will not work.
"""
msg = "The uuid cannot be set manually."
raise Exception(msg)
@property
def resource_id(self):
return self.id
@resource_id.deleter
def resource_id(self):
del self.id
@resource_id.setter
def resource_id(self, value):
self.id = value
[docs]
def __str__(self):
return self.id
[docs]
def _repr_pretty_(self, p, cycle):
p.text(str(self))
[docs]
def __repr__(self):
return 'ResourceIdentifier(id="%s")' % self.id
[docs]
def __eq__(self, other):
if self.id == other:
return True
if not isinstance(other, ResourceIdentifier):
return False
if self.id == other.id:
return True
return False
[docs]
def __ne__(self, other):
return not self.__eq__(other)
[docs]
def __hash__(self):
"""
Uses the same hash as the resource id. This means that class instances
can be used in dictionaries and other hashed types.
Both the object and it's id can still be independently used as
dictionary keys.
"""
# "Salt" the hash with a string so the hash of the object and a
# string identical to the id can both be used as individual
# dictionary keys.
return hash("RESOURCE_ID") + self.id.__hash__()
# Like the weakref.ref class, calling a resource id instance can
# return the referred object, if in scope, else None.
__call__ = get_referred_object
[docs]
@deprecated()
def regenerate_uuid(self):
"""
Regenerates the uuid part of the ID. Does nothing for resource
identifiers with a user-set, fixed id.
"""
self._uuid = str(uuid4())
[docs]
@classmethod
def register_get_object_hook(cls, get_object_callable):
"""
Register a callable to return referred object if normal means fail.
This is useful, for example, to allow the resource_identifier to
create objects from entries in a database that do not yet have an
in-memory representation.
:param get_object_callable:
Any callable that takes a resource id string and returns an
object.
:type get_object_callable: callable
"""
cls._get_object_hook.append(get_object_callable)
[docs]
@classmethod
def remove_get_object_hook(cls, get_object_callable):
"""
Remove a callable from the registered get_object hooks.
:param get_object_callable:
The callable to remove from the get_object hook registry.
:type get_object_callable: callable, optional
"""
if get_object_callable is not None:
try:
cls._get_object_hook.remove(get_object_callable)
except ValueError: # Pass if get_object_callable is not in list.
pass
else:
cls._get_object_hook.clear()
[docs]
@classmethod
def _bind_class_state(cls, state_dict):
"""
Bind the state contained in state_dict to ResourceIdentifier class.
"""
cls._parent_id_tree = state_dict['parent_id_tree']
cls._id_order = state_dict['id_order']
cls._id_object_map = state_dict['id_object_map']
cls._get_referred_object_hook = state_dict['get_object_hook']
[docs]
@classmethod
@contextmanager
def _debug_class_state(cls):
"""
Context manager for debugging the class level state for Resource Ids.
Replaces the current resource_id and unbound mappings returning
a dictionary with the new mappings as values and "rdict", and
"unbound" as keys. This function restores original mappings upon exit.
"""
# get current class state
old_state = dict(
parent_id_tree=cls._parent_id_tree,
id_order=cls._id_order,
id_object_map=cls._id_object_map,
get_object_hook=cls._get_object_hook,
)
# init new class state
new_state = dict(
parent_id_tree=WeakKeyDictionary(),
id_order=WeakKeyDictionary(),
id_object_map=WeakValueDictionary(),
get_object_hook=[],
)
# bind new state and return dict
cls._bind_class_state(new_state)
yield new_state
# reset prior state
cls._bind_class_state(old_state)
if __name__ == '__main__':
import doctest
doctest.testmod(exclude_empty=True)