#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Non-geographical restrictions and constraints for the mass downloader.
:copyright:
Lion Krischer (krischer@geophysik.uni-muenchen.de), 2014-2015
:license:
GNU Lesser General Public License, Version 3
(https://www.gnu.org/copyleft/lesser.html)
"""
import collections.abc
import obspy
[docs]
class Restrictions(object):
"""
Class storing non-domain restrictions for a query. This is best explained
with two examples. See the list below for a more detailed explanation
of the parameters. The first set of restrictions is useful for event
based earthquake set queries.
>>> import obspy
>>> restrictions = Restrictions(
... # Get data from 5 minutes before the event to one hour after the
... # event.
... starttime=obspy.UTCDateTime(2012, 1, 1),
... endtime=obspy.UTCDateTime(2012, 1, 2),
... # You might not want to deal with gaps in the data.
... reject_channels_with_gaps=True,
... # And you might only want waveforms that have data for at least
... # 95 % of the requested time span.
... minimum_length=0.95,
... # No two stations should be closer than 10 km to each other.
... minimum_interstation_distance_in_m=10E3,
... # Only HH or BH channels. If a station has HH channels,
... # those will be downloaded, otherwise the BH. Nothing will be
... # downloaded if it has neither.
... channel_priorities=["HH[ZNE]", "BH[ZNE]"],
... # Location codes are arbitrary and there is no rule as to which
... # location is best.
... location_priorities=["", "00", "10"])
And the restrictions for downloading a noise data set might look similar to
the following:
>>> import obspy
>>> restrictions = Restrictions(
... # Get data for a whole year.
... starttime=obspy.UTCDateTime(2012, 1, 1),
... endtime=obspy.UTCDateTime(2013, 1, 1),
... # Chunk it to have one file per day.
... chunklength_in_sec=86400,
... # Considering the enormous amount of data associated with
... # continuous requests, you might want to limit the data based on
... # SEED identifiers. If the location code is specified, the
... # location priority list is not used; the same is true for the
... # channel argument and priority list.
... network="BW", station="A*", location="", channel="BH*",
... # The typical use case for such a data set are noise correlations
... # where gaps are dealt with at a later stage.
... reject_channels_with_gaps=False,
... # Same is true with the minimum length. Any data during a day
... # might be useful.
... minimum_length=0.0,
... # Sanitize makes sure that each MiniSEED file also has an
... # associated StationXML file, otherwise the MiniSEED files will
... # be deleted afterwards. This is not desirable for large noise
... # data sets.
... sanitize=False,
... # Guard against the same station having different names.
... minimum_interstation_distance_in_m=100.0)
The ``network``, ``station``, ``location``, and ``channel`` codes are
directly passed to the `station` service of each fdsn-ws implementation
and can thus take comma separated string lists as arguments, i.e.
.. code-block:: python
restrictions = Restrictions(
...
network="BW,G?", station="A*,B*",
...
)
Not all fdsn-ws implementations support the direct exclusion of network
or station codes. The ``exclude_networks`` and ``exclude_stations``
arguments should thus be used for that purpose to ensure compatibility
across all data providers, e.g.
.. code-block:: python
restrictions = Restrictions(
...
network="B*,G*", station="A*, B*",
exclude_networks=["BW", "GR"],
exclude_stations=["AL??", "*O"],
...
)
It is also possible to restrict the downloaded stations to stations part of
an existing inventory object which can originate from a StationXML file or
from other sources. It will only keep stations that are part of the
inventory object. Channels are still selected dynamically based on the
other restrictions. Keep in mind that all other restrictions still apply -
passing an inventory will just further restrict the possibly downloaded
data.
.. code-block:: python
restrictions = Restrictions(
...
limit_stations_to_inventory=inv,
...
)
:param starttime: The start time of the data to be downloaded.
:type starttime: :class:`~obspy.core.utcdatetime.UTCDateTime`
:param endtime: The end time of the data.
:type endtime: :class:`~obspy.core.utcdatetime.UTCDateTime`
:param station_starttime: The start time of the station files. If not
given, the ``starttime`` argument will be used. This is useful when
trying to incorporate multiple waveform datasets with a central
station file archive as StationXML files can be downloaded once and
for the whole time span.
:type station_starttime: :class:`~obspy.core.utcdatetime.UTCDateTime`
:param station_endtime: The end time of the station files. Analogous to
the ``station_starttime`` argument.
:type station_endtime: :class:`~obspy.core.utcdatetime.UTCDateTime`
:param chunklength_in_sec: The length of one chunk in seconds. If set,
the time between ``starttime`` and ``endtime`` will be divided into
segments of ``chunklength_in_sec`` seconds. Useful for continuous data
requests. Set to ``None`` if one piece of data is desired between
``starttime`` and ``endtime`` (the default).
:type chunklength_in_sec: float
:param network: The network code. Can contain wildcards.
:type network: str
:param station: The station code. Can contain wildcards.
:type station: str
:param location: The location code. Can contain wildcards.
:type location: str
:param channel: The channel code. Can contain wildcards.
:type channel: str
:param exclude_networks: A list of potentially wildcarded networks that
should not be downloaded.
:type exclude_networks: list[str]
:param exclude_stations: A list of potentially wildcarded stations that
should not be downloaded.
:type exclude_stations: list[str]
:param limit_stations_to_inventory: If given, only stations part of the
this inventory object will be downloaded. All other restrictions
still apply - this just serves to further limit the set of stations
to download.
:type limit_stations_to_inventory:
:class:`~obspy.core.inventory.inventory.Inventory`
:param reject_channels_with_gaps: If True (default), MiniSEED files with
gaps and/or overlaps will be rejected.
:type reject_channels_with_gaps: bool
:param minimum_length: The minimum length of the data as a fraction of
the requested time frame. After a channel has been downloaded it
will be checked that its total length is at least that fraction of
the requested time span. Will be rejected otherwise. Must be between
``0.0`` and ``1.0``, defaults to ``0.9``.
:type minimum_length: float
:param sanitize: Sanitize makes sure that each MiniSEED file also has an
associated StationXML file, otherwise the MiniSEED files will be
deleted afterwards. This is potentially not desirable for large noise
data sets.
:type sanitize: bool
:param minimum_interstation_distance_in_m: The minimum inter-station
distance. Data from any new station closer to any existing station
will not be downloaded. Also used for duplicate station detection as
sometimes stations have different names for different webservice
providers. Defaults to `1000 m`.
:type minimum_interstation_distance_in_m: float
:param channel_priorities: Priority list for the channels. Will not be
used if the ``channel`` argument is used.
:type channel_priorities: list[str]
:param location_priorities: Priority list for the locations. Will not be
used if the ``location`` argument is used.
:type location_priorities: list[str]
"""
[docs]
def __init__(self, starttime, endtime,
station_starttime=None, station_endtime=None,
chunklength_in_sec=None,
network=None, station=None, location=None, channel=None,
exclude_networks=tuple(), exclude_stations=tuple(),
limit_stations_to_inventory=None,
reject_channels_with_gaps=True, minimum_length=0.9,
sanitize=True, minimum_interstation_distance_in_m=1000,
channel_priorities=("HH[ZNE12]", "BH[ZNE12]",
"MH[ZNE12]", "EH[ZNE12]",
"LH[ZNE12]", "HL[ZNE12]",
"BL[ZNE12]", "ML[ZNE12]",
"EL[ZNE12]", "LL[ZNE12]",
"SH[ZNE12]"),
location_priorities=("", "00", "10", "01", "20", "02", "30",
"03", "40", "04", "50", "05", "60",
"06", "70", "07", "80", "08", "90",
"09")):
# Awkward logic to keep track whether or not the location priorities
# are equal to the default values. This "solution" keeps the function
# signature intact and it also located close to where the location
# priorities are set.
if location_priorities == (
"", "00", "10", "01", "20", "02", "30", "03", "40", "04", "50",
"05", "60", "06", "70", "07", "80", "08", "90", "09"):
self._loc_prios_are_default_values = True
else:
self._loc_prios_are_default_values = False
self.starttime = obspy.UTCDateTime(starttime)
self.endtime = obspy.UTCDateTime(endtime)
self.station_starttime = station_starttime and \
obspy.UTCDateTime(station_starttime)
self.station_endtime = station_endtime and \
obspy.UTCDateTime(station_endtime)
if self.station_starttime and self.station_starttime > self.starttime:
raise ValueError("The station start time must be smaller than the "
"main start time.")
if self.station_endtime and self.station_endtime < self.endtime:
raise ValueError("The station end time must be larger than the "
"main end time.")
self.chunklength = chunklength_in_sec and float(chunklength_in_sec)
self.network = network
self.station = station
self.location = location
self.channel = channel
self.exclude_networks = exclude_networks
self.exclude_stations = exclude_stations
self.reject_channels_with_gaps = reject_channels_with_gaps
self.minimum_length = minimum_length
self.sanitize = bool(sanitize)
# These must be iterables, but not strings.
if not isinstance(channel_priorities, collections.abc.Iterable) \
or isinstance(channel_priorities, str):
msg = "'channel_priorities' must be a list or other iterable " \
"container."
raise TypeError(msg)
if not isinstance(location_priorities, collections.abc.Iterable) \
or isinstance(location_priorities, str):
msg = "'location_priorities' must be a list or other iterable " \
"container."
raise TypeError(msg)
self.channel_priorities = channel_priorities
self.location_priorities = location_priorities
self.minimum_interstation_distance_in_m = \
float(minimum_interstation_distance_in_m)
# Further restrict the possibly downloaded networks and station to
# the one in the given inventory.
if limit_stations_to_inventory is not None:
self.limit_stations_to_inventory = set()
for net in limit_stations_to_inventory:
for sta in net:
self.limit_stations_to_inventory.add((net.code, sta.code))
else:
self.limit_stations_to_inventory = None
[docs]
def __eq__(self, other):
return self.__dict__ == other.__dict__
[docs]
def __ne__(self, other):
return not self == other
[docs]
def __iter__(self):
"""
Iterator yielding time intervals based on the chunklength and
temporal settings.
"""
if not self.chunklength:
return iter([(self.starttime, self.endtime)])
def it():
"""
Tiny iterator.
"""
starttime = self.starttime
endtime = self.endtime
chunklength = self.chunklength
while starttime < endtime:
yield (starttime, min(starttime + chunklength, endtime))
starttime += chunklength
return
return it()
if __name__ == '__main__':
import doctest
doctest.testmod(exclude_empty=True)