Source code for obspy.clients.fdsn.mass_downloader.restrictions

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Non-geographical restrictions and constraints for the mass downloader.

:copyright:
    Lion Krischer (krischer@geophysik.uni-muenchen.de), 2014-2015
:license:
    GNU Lesser General Public License, Version 3
    (https://www.gnu.org/copyleft/lesser.html)
"""
import collections.abc

import obspy


[docs] class Restrictions(object): """ Class storing non-domain restrictions for a query. This is best explained with two examples. See the list below for a more detailed explanation of the parameters. The first set of restrictions is useful for event based earthquake set queries. >>> import obspy >>> restrictions = Restrictions( ... # Get data from 5 minutes before the event to one hour after the ... # event. ... starttime=obspy.UTCDateTime(2012, 1, 1), ... endtime=obspy.UTCDateTime(2012, 1, 2), ... # You might not want to deal with gaps in the data. ... reject_channels_with_gaps=True, ... # And you might only want waveforms that have data for at least ... # 95 % of the requested time span. ... minimum_length=0.95, ... # No two stations should be closer than 10 km to each other. ... minimum_interstation_distance_in_m=10E3, ... # Only HH or BH channels. If a station has HH channels, ... # those will be downloaded, otherwise the BH. Nothing will be ... # downloaded if it has neither. ... channel_priorities=["HH[ZNE]", "BH[ZNE]"], ... # Location codes are arbitrary and there is no rule as to which ... # location is best. ... location_priorities=["", "00", "10"]) And the restrictions for downloading a noise data set might look similar to the following: >>> import obspy >>> restrictions = Restrictions( ... # Get data for a whole year. ... starttime=obspy.UTCDateTime(2012, 1, 1), ... endtime=obspy.UTCDateTime(2013, 1, 1), ... # Chunk it to have one file per day. ... chunklength_in_sec=86400, ... # Considering the enormous amount of data associated with ... # continuous requests, you might want to limit the data based on ... # SEED identifiers. If the location code is specified, the ... # location priority list is not used; the same is true for the ... # channel argument and priority list. ... network="BW", station="A*", location="", channel="BH*", ... # The typical use case for such a data set are noise correlations ... # where gaps are dealt with at a later stage. ... reject_channels_with_gaps=False, ... # Same is true with the minimum length. Any data during a day ... # might be useful. ... minimum_length=0.0, ... # Sanitize makes sure that each MiniSEED file also has an ... # associated StationXML file, otherwise the MiniSEED files will ... # be deleted afterwards. This is not desirable for large noise ... # data sets. ... sanitize=False, ... # Guard against the same station having different names. ... minimum_interstation_distance_in_m=100.0) The ``network``, ``station``, ``location``, and ``channel`` codes are directly passed to the `station` service of each fdsn-ws implementation and can thus take comma separated string lists as arguments, i.e. .. code-block:: python restrictions = Restrictions( ... network="BW,G?", station="A*,B*", ... ) Not all fdsn-ws implementations support the direct exclusion of network or station codes. The ``exclude_networks`` and ``exclude_stations`` arguments should thus be used for that purpose to ensure compatibility across all data providers, e.g. .. code-block:: python restrictions = Restrictions( ... network="B*,G*", station="A*, B*", exclude_networks=["BW", "GR"], exclude_stations=["AL??", "*O"], ... ) It is also possible to restrict the downloaded stations to stations part of an existing inventory object which can originate from a StationXML file or from other sources. It will only keep stations that are part of the inventory object. Channels are still selected dynamically based on the other restrictions. Keep in mind that all other restrictions still apply - passing an inventory will just further restrict the possibly downloaded data. .. code-block:: python restrictions = Restrictions( ... limit_stations_to_inventory=inv, ... ) :param starttime: The start time of the data to be downloaded. :type starttime: :class:`~obspy.core.utcdatetime.UTCDateTime` :param endtime: The end time of the data. :type endtime: :class:`~obspy.core.utcdatetime.UTCDateTime` :param station_starttime: The start time of the station files. If not given, the ``starttime`` argument will be used. This is useful when trying to incorporate multiple waveform datasets with a central station file archive as StationXML files can be downloaded once and for the whole time span. :type station_starttime: :class:`~obspy.core.utcdatetime.UTCDateTime` :param station_endtime: The end time of the station files. Analogous to the ``station_starttime`` argument. :type station_endtime: :class:`~obspy.core.utcdatetime.UTCDateTime` :param chunklength_in_sec: The length of one chunk in seconds. If set, the time between ``starttime`` and ``endtime`` will be divided into segments of ``chunklength_in_sec`` seconds. Useful for continuous data requests. Set to ``None`` if one piece of data is desired between ``starttime`` and ``endtime`` (the default). :type chunklength_in_sec: float :param network: The network code. Can contain wildcards. :type network: str :param station: The station code. Can contain wildcards. :type station: str :param location: The location code. Can contain wildcards. :type location: str :param channel: The channel code. Can contain wildcards. :type channel: str :param exclude_networks: A list of potentially wildcarded networks that should not be downloaded. :type exclude_networks: list[str] :param exclude_stations: A list of potentially wildcarded stations that should not be downloaded. :type exclude_stations: list[str] :param limit_stations_to_inventory: If given, only stations part of the this inventory object will be downloaded. All other restrictions still apply - this just serves to further limit the set of stations to download. :type limit_stations_to_inventory: :class:`~obspy.core.inventory.inventory.Inventory` :param reject_channels_with_gaps: If True (default), MiniSEED files with gaps and/or overlaps will be rejected. :type reject_channels_with_gaps: bool :param minimum_length: The minimum length of the data as a fraction of the requested time frame. After a channel has been downloaded it will be checked that its total length is at least that fraction of the requested time span. Will be rejected otherwise. Must be between ``0.0`` and ``1.0``, defaults to ``0.9``. :type minimum_length: float :param sanitize: Sanitize makes sure that each MiniSEED file also has an associated StationXML file, otherwise the MiniSEED files will be deleted afterwards. This is potentially not desirable for large noise data sets. :type sanitize: bool :param minimum_interstation_distance_in_m: The minimum inter-station distance. Data from any new station closer to any existing station will not be downloaded. Also used for duplicate station detection as sometimes stations have different names for different webservice providers. Defaults to `1000 m`. :type minimum_interstation_distance_in_m: float :param channel_priorities: Priority list for the channels. Will not be used if the ``channel`` argument is used. :type channel_priorities: list[str] :param location_priorities: Priority list for the locations. Will not be used if the ``location`` argument is used. :type location_priorities: list[str] """
[docs] def __init__(self, starttime, endtime, station_starttime=None, station_endtime=None, chunklength_in_sec=None, network=None, station=None, location=None, channel=None, exclude_networks=tuple(), exclude_stations=tuple(), limit_stations_to_inventory=None, reject_channels_with_gaps=True, minimum_length=0.9, sanitize=True, minimum_interstation_distance_in_m=1000, channel_priorities=("HH[ZNE12]", "BH[ZNE12]", "MH[ZNE12]", "EH[ZNE12]", "LH[ZNE12]", "HL[ZNE12]", "BL[ZNE12]", "ML[ZNE12]", "EL[ZNE12]", "LL[ZNE12]", "SH[ZNE12]"), location_priorities=("", "00", "10", "01", "20", "02", "30", "03", "40", "04", "50", "05", "60", "06", "70", "07", "80", "08", "90", "09")): # Awkward logic to keep track whether or not the location priorities # are equal to the default values. This "solution" keeps the function # signature intact and it also located close to where the location # priorities are set. if location_priorities == ( "", "00", "10", "01", "20", "02", "30", "03", "40", "04", "50", "05", "60", "06", "70", "07", "80", "08", "90", "09"): self._loc_prios_are_default_values = True else: self._loc_prios_are_default_values = False self.starttime = obspy.UTCDateTime(starttime) self.endtime = obspy.UTCDateTime(endtime) self.station_starttime = station_starttime and \ obspy.UTCDateTime(station_starttime) self.station_endtime = station_endtime and \ obspy.UTCDateTime(station_endtime) if self.station_starttime and self.station_starttime > self.starttime: raise ValueError("The station start time must be smaller than the " "main start time.") if self.station_endtime and self.station_endtime < self.endtime: raise ValueError("The station end time must be larger than the " "main end time.") self.chunklength = chunklength_in_sec and float(chunklength_in_sec) self.network = network self.station = station self.location = location self.channel = channel self.exclude_networks = exclude_networks self.exclude_stations = exclude_stations self.reject_channels_with_gaps = reject_channels_with_gaps self.minimum_length = minimum_length self.sanitize = bool(sanitize) # These must be iterables, but not strings. if not isinstance(channel_priorities, collections.abc.Iterable) \ or isinstance(channel_priorities, str): msg = "'channel_priorities' must be a list or other iterable " \ "container." raise TypeError(msg) if not isinstance(location_priorities, collections.abc.Iterable) \ or isinstance(location_priorities, str): msg = "'location_priorities' must be a list or other iterable " \ "container." raise TypeError(msg) self.channel_priorities = channel_priorities self.location_priorities = location_priorities self.minimum_interstation_distance_in_m = \ float(minimum_interstation_distance_in_m) # Further restrict the possibly downloaded networks and station to # the one in the given inventory. if limit_stations_to_inventory is not None: self.limit_stations_to_inventory = set() for net in limit_stations_to_inventory: for sta in net: self.limit_stations_to_inventory.add((net.code, sta.code)) else: self.limit_stations_to_inventory = None
[docs] def __eq__(self, other): return self.__dict__ == other.__dict__
[docs] def __ne__(self, other): return not self == other
[docs] def __iter__(self): """ Iterator yielding time intervals based on the chunklength and temporal settings. """ if not self.chunklength: return iter([(self.starttime, self.endtime)]) def it(): """ Tiny iterator. """ starttime = self.starttime endtime = self.endtime chunklength = self.chunklength while starttime < endtime: yield (starttime, min(starttime + chunklength, endtime)) starttime += chunklength return return it()
if __name__ == '__main__': import doctest doctest.testmod(exclude_empty=True)