Source code for syne_tune.optimizer.schedulers.searchers.bayesopt.gpautograd.gluon

# Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License").
# You may not use this file except in compliance with the License.
# A copy of the License is located at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# or in the "license" file accompanying this file. This file is distributed
# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
# express or implied. See the License for the specific language governing
# permissions and limitations under the License.
"""Gluon APIs for autograd"""
import threading
import warnings
import re
from collections import OrderedDict
import autograd.numpy as anp
from autograd.builtins import isinstance

__all__ = ["Block", "Parameter", "ParameterDict"]


def _indent(s_, numSpaces):
    """Indent string"""
    s = s_.split("\n")
    if len(s) == 1:
        return s_
    first = s.pop(0)
    s = [first] + [(numSpaces * " ") + line for line in s]
    s = "\n".join(s)
    return s


def shape_is_known(shape):
    """Check whether a shape is completely known with or without np semantics.
    Please see the doc of :func:`is_np_shape` for more details.
    """
    if shape is None:
        return False
    unknown_dim_size = -1
    if len(shape) == 0:
        return unknown_dim_size == -1
    for dim_size in shape:
        if dim_size == unknown_dim_size:
            return False
        assert (
            dim_size > unknown_dim_size
        ), "shape dimension size cannot be less than {}, while " "received {}".format(
            unknown_dim_size, dim_size
        )
    return True


[docs] class Parameter: """A Container holding parameters (weights) of Blocks. :py:class:`Parameter` holds a copy of the parameter on each :py:class:`Context` after it is initialized with ``Parameter.initialize(...)``. If :py:attr:`grad_req` is not ``'null'``, it will also hold a gradient array on each :py:class:`Context`:: x = np.zeros((16, 100)) w = Parameter('fc_weight', shape=(16, 100), init=np.random.uniform) w.initialize() b.initialize() z = x + w.data Parameters ---------- name : str Name of this parameter. grad_req : {'write', 'add', 'null'}, default 'write' Specifies how to update gradient to grad arrays. - ``'write'`` means everytime gradient is written to grad :py:class:`NDArray`. - ``'add'`` means everytime gradient is added to the grad :py:class:`NDArray`. You need to manually call ``zero_grad()`` to clear the gradient buffer before each iteration when using this option. - 'null' means gradient is not requested for this parameter. gradient arrays will not be allocated. shape : int or tuple of int, default None Shape of this parameter. By default shape is not specified. Parameter with unknown shape can be used for :py:class:`Symbol` API, but ``init`` will throw an error when using :py:class:`NDArray` API. dtype : numpy.dtype or str, default 'float64' Data type of this parameter. For example, ``numpy.float64`` or ``'float64'``. lr_mult : float, default 1.0 Learning rate multiplier. Learning rate will be multiplied by lr_mult when updating this parameter with optimizer. wd_mult : float, default 1.0 Weight decay multiplier (L2 regularizer coefficient). Works similar to lr_mult. init : Initializer, default None Initializer of this parameter. Will use the global initializer by default. stype: {'default', 'row_sparse', 'csr'}, defaults to 'default'. The storage type of the parameter. grad_stype: {'default', 'row_sparse', 'csr'}, defaults to 'default'. The storage type of the parameter's gradient. Attributes ---------- grad_req : {'write', 'add', 'null'} This can be set before or after initialization. Setting ``grad_req`` to ``'null'`` with ``x.grad_req = 'null'`` saves memory and computation when you don't need gradient w.r.t x. lr_mult : float Local learning rate multiplier for this Parameter. The actual learning rate is calculated with ``learning_rate * lr_mult``. You can set it with ``param.lr_mult = 2.0`` wd_mult : float Local weight decay multiplier for this Parameter. """ def __init__( self, name, grad_req="write", shape=None, dtype=anp.float64, lr_mult=1.0, wd_mult=1.0, init=None, allow_deferred_init=False, differentiable=True, stype="default", grad_stype="default", ): self._var = None self._data = None self._grad = None self._ctx_list = None self._ctx_map = None self._trainer = None self._deferred_init = () self._differentiable = differentiable if allow_deferred_init: raise NotImplementedError( "allow_deferred_init is not a valid option in autograd" ) self._allow_deferred_init = allow_deferred_init self._grad_req = None if isinstance(shape, int): shape = (shape,) self._shape = shape self.name = name self._dtype = dtype self.lr_mult = lr_mult self.wd_mult = wd_mult self.grad_req = grad_req self.init = init # sparse related storage type information valid_stypes = ["default"] assert grad_stype in valid_stypes, ( "grad_stype for Parameter '%s' must be " "one of 'default', 'row_sparse', or 'csr', but got '%s'" % (name, grad_stype) ) assert stype in valid_stypes, ( "stype for Parameter '%s' must be " "one of 'default', 'row_sparse', or 'csr', but got '%s'" % (name, stype) ) self._grad_stype = grad_stype self._stype = stype def __repr__(self): s = "Parameter {name} (shape={shape}, dtype={dtype})" return s.format(name=self.name, shape=self.shape, dtype=self.dtype) @property def grad_req(self): return self._grad_req @grad_req.setter def grad_req(self, req): assert req in ["write", "add", "null"], ( "grad_req must be one of 'write', 'add', or 'null', but got '%s'" % req ) if not self._differentiable: req = "null" if self._grad_req == req: return self._grad_req = req if req == "null" and self._grad is not None: self._grad = None self._data = [i.detach() for i in self._data] elif self._data is not None: self._init_grad() @property def dtype(self): """The type of the parameter. Setting the dtype value is equivalent to casting the value of the parameter """ return self._dtype @dtype.setter def dtype(self, dtype): self.cast(dtype) @property def shape(self): """The shape of the parameter. By default, an unknown dimension size is 0. However, when the NumPy semantic is turned on, unknown dimension size is -1. """ if self._shape is None: return None else: # Parameters shouldn't be zero-size. If one of its dimension is 0, # it means the parameter isn't initialized. In the NumPy semantics, # the unknown dimension should be marked with -1. return tuple(i if i != 0 else -1 for i in self._shape) @shape.setter def shape(self, new_shape): if self._shape is None: self._shape = new_shape return assert len(self._shape) == len(new_shape) and all( j in (-1, 0, i) for i, j in zip(new_shape, self._shape) ), "Expected shape %s is incompatible with given shape %s." % ( str(new_shape), str(self._shape), ) # -1 means unknown dim size in np_shape mode self._shape = new_shape def _check_and_get(self, arr_list, ctx): if arr_list is not None: if ctx is list: return arr_list if ctx is None: if len(arr_list) == 1: return arr_list[0] # else: # ctx = context.current_context() ctx_list = self._ctx_map[ctx.device_typeid & 1] if ctx.device_id < len(ctx_list): idx = ctx_list[ctx.device_id] if idx is not None: return arr_list[idx] raise RuntimeError( "Parameter '%s' was not initialized on context %s. " "It was only initialized on %s." % (self.name, str(ctx), str(self._ctx_list)) ) if self._deferred_init: raise NotImplementedError("Cannot enable deferred init") raise RuntimeError( "Parameter '%s' has not been initialized. Note that " "you should initialize parameters and create Trainer " "with Block.collect_params() instead of Block.params " "because the later does not include Parameters of " "nested child Blocks" % (self.name) ) def _init_impl(self, data, ctx_list=None): """Sets data and grad.""" self._data = [data] self._init_grad() def _init_grad(self): """Initialize grad buffers.""" if self.grad_req == "null": self._grad = None return if self._grad_stype != "default": raise ValueError( "numpy.zeros does not support stype = {}".format(self._grad_stype) ) self._grad = [anp.zeros(shape=i.shape, dtype=i.dtype) for i in self._data] # autograd.mark_variables(self._check_and_get(self._data, list), # self._grad, self.grad_req)
[docs] def initialize(self, init=None, ctx=None, default_init=None, force_reinit=False): """Initializes parameter and gradient arrays. Only used for :py:class:`NDArray` API. Parameters ---------- init : Initializer The initializer to use. Overrides :py:meth:`Parameter.init` and default_init. ctx : Context or list of Context, defaults to :py:meth:`context.current_context()`. Initialize Parameter on given context. If ctx is a list of Context, a copy will be made for each context. .. note:: Copies are independent arrays. User is responsible for keeping their values consistent when updating. Normally :py:class:`gluon.Trainer` does this for you. default_init : Initializer Default initializer is used when both :py:func:`init` and :py:meth:`Parameter.init` are ``None``. force_reinit : bool, default False Whether to force re-initialization if parameter is already initialized. Examples -------- >>> weight = mx.gluon.Parameter('weight', shape=(2, 2)) >>> weight.initialize(ctx=mx.cpu(0)) >>> weight.data() [[-0.01068833 0.01729892] [ 0.02042518 -0.01618656]] <NDArray 2x2 @cpu(0)> >>> weight.grad() [[ 0. 0.] [ 0. 0.]] <NDArray 2x2 @cpu(0)> >>> weight.initialize(ctx=[mx.gpu(0), mx.gpu(1)]) >>> weight.data(mx.gpu(0)) [[-0.00873779 -0.02834515] [ 0.05484822 -0.06206018]] <NDArray 2x2 @gpu(0)> >>> weight.data(mx.gpu(1)) [[-0.00873779 -0.02834515] [ 0.05484822 -0.06206018]] <NDArray 2x2 @gpu(1)> """ if default_init is None: default_init = anp.random.uniform if self._data is not None and not force_reinit: warnings.warn( "Parameter '%s' is already initialized, ignoring. " "Set force_reinit=True to re-initialize." % self.name, stacklevel=2, ) return self._data = self._grad = None # init -> self.init -> default_init if init is None: init = default_init if self.init is None else self.init if not shape_is_known(self.shape): if self._allow_deferred_init: raise NotImplementedError("deferred_init not implemented for autograd") return raise ValueError( "Cannot initialize Parameter '%s' because it has " "invalid shape: %s." % (self.name, str(self.shape)) ) try: data = init(shape=self.shape) except TypeError: data = init(size=self.shape) self._init_impl(data, ctx_list=ctx)
[docs] def reset_ctx(self, ctx): """Re-assign Parameter to other contexts. Parameters ---------- ctx : Context or list of Context, default ``context.current_context()``. Assign Parameter to given context. If ctx is a list of Context, a copy will be made for each context. """ return
[docs] def set_data(self, data): """Sets this parameter's value on all contexts.""" self.shape = data.shape if self._data is None: assert self._deferred_init, ( "Parameter '%s' has not been initialized" % self.name ) self._deferred_init = self._deferred_init[:3] + (data,) return # self._check_and_get(self._data, list) # added, raise no initialization error # for arr in self._check_and_get(self._data, list): # arr[:] = data for i in range(len(self._data)): self._data[i] = anp.array(data, copy=True)
[docs] def data(self, ctx=None): """Returns a copy of this parameter on one context. Must have been initialized on this context before. For sparse parameters, use :py:meth:`Parameter.row_sparse_data` instead. Parameters ---------- ctx : Context Desired context. Returns ------- NDArray on ctx """ if self._stype != "default": raise RuntimeError( "Cannot return a copy of Parameter '%s' on ctx %s via data() " "because its storage type is %s. Please use row_sparse_data() " "instead." % (self.name, str(ctx), self._stype) ) return self._check_and_get(self._data, ctx)
[docs] def list_data(self): """Returns copies of this parameter on all contexts, in the same order as creation. For sparse parameters, use :py:meth:`Parameter.list_row_sparse_data` instead. Returns ------- list of NDArrays """ if self._stype != "default": raise RuntimeError( "Cannot return copies of Parameter '%s' on all contexts via " "list_data() because its storage type is %s. Please use " "row_sparse_data() instead." % (self.name, self._stype) ) return self._check_and_get(self._data, list)
[docs] def grad(self, ctx=None): """Returns a gradient buffer for this parameter on one context. Parameters ---------- ctx : Context Desired context. """ if self._data is not None and self._grad is None: raise RuntimeError( "Cannot get gradient array for Parameter '%s' " "because grad_req='null'" % (self.name) ) return self._check_and_get(self._grad, ctx)
[docs] def list_grad(self): """Returns gradient buffers on all contexts, in the same order as :py:meth:`values`.""" if self._data is not None and self._grad is None: raise RuntimeError( "Cannot get gradient array for Parameter '%s' " "because grad_req='null'" % (self.name) ) return self._check_and_get(self._grad, list)
[docs] def list_ctx(self): """Returns a list of contexts this parameter is initialized on.""" if self._data is None: if self._deferred_init: return self._deferred_init[1] raise RuntimeError("Parameter '%s' has not been initialized" % self.name) return self._ctx_list
[docs] def zero_grad(self): """Sets gradient buffer on all contexts to 0. No action is taken if parameter is uninitialized or doesn't require gradient.""" if self._grad is None: return for i in self._grad: i[:] = 0
[docs] def cast(self, dtype): """Cast data and gradient of this Parameter to a new data type. Parameters ---------- dtype : str or numpy.dtype The new data type. """ self._dtype = dtype if self._data is None: return self._data = [i.astype(dtype) for i in self._data] if self._grad is None: return self._grad = [i.astype(dtype) for i in self._grad]
[docs] class ParameterDict: """A dictionary managing a set of parameters. Parameters ---------- prefix : str, default ``''`` The prefix to be prepended to all Parameters' names created by this dict. shared : ParameterDict or None If not ``None``, when this dict's :py:meth:`get` method creates a new parameter, will first try to retrieve it from "shared" dict. Usually used for sharing parameters with another Block. """ def __init__(self, prefix="", shared=None): self._prefix = prefix self._params = OrderedDict() self._shared = shared def __repr__(self): s = "{name}(\n{content}\n)" name = self._prefix + " " if self._prefix else "" return s.format( name=name, content="\n".join([_indent(" {0}".format(v), 2) for v in self.values()]), ) def __getitem__(self, key): return self._params[key] def __iter__(self): return iter(self._params)
[docs] def items(self): return self._params.items()
[docs] def keys(self): return self._params.keys()
[docs] def values(self): return self._params.values()
@property def prefix(self): """Prefix of this dict. It will be prepended to :py:class:`Parameter`s' name created with :py:func:`get`.""" return self._prefix def _get_impl(self, name): if name in self._params: return self._params[name] if self._shared is not None and name in self._shared._params: self._params[name] = self._shared._params[name] return self._shared._params[name] return None
[docs] def get(self, name, **kwargs): """Retrieves a :py:class:`Parameter` with name ``self.prefix+name``. If not found, :py:func:`get` will first try to retrieve it from "shared" dict. If still not found, :py:func:`get` will create a new :py:class:`Parameter` with key-word arguments and insert it to self. Parameters ---------- name : str Name of the desired Parameter. It will be prepended with this dictionary's prefix. **kwargs : Dict[str, Any] The rest of key-word arguments for the created :py:class:`Parameter`. Returns ------- Parameter The created or retrieved :py:class:`Parameter`. """ name = self.prefix + name param = self._get_impl(name) if param is None: # pylint: disable=too-many-nested-blocks param = Parameter(name, **kwargs) self._params[name] = param else: for k, v in kwargs.items(): if hasattr(param, k) and getattr(param, k) is not None: existing = getattr(param, k) if k == "shape" and len(v) == len(existing): inferred_shape = [] matched = True for dim1, dim2 in zip(v, existing): if dim1 != dim2 and dim1 > 0 and dim2 > 0: matched = False break elif dim1 == dim2: inferred_shape.append(dim1) elif dim1 in ( 0, -1, ): # -1 means unknown dim size in np_shape mode inferred_shape.append(dim2) else: inferred_shape.append(dim1) if matched: param._shape = tuple(inferred_shape) continue elif k == "dtype" and anp.dtype(v) == anp.dtype(existing): continue assert v is None or v == existing, ( "Cannot retrieve Parameter '%s' because desired attribute " "does not match with stored for attribute '%s': " "desired '%s' vs stored '%s'." % (name, k, str(v), str(getattr(param, k))) ) else: setattr(param, k, v) return param
[docs] def update(self, other): """Copies all Parameters in ``other`` to self.""" for k, v in other.items(): if k in self._params: assert self._params[k] is v, ( "Cannot update self with other because they have different " "Parameters with the same name '%s'" % k ) for k, v in other.items(): self._params[k] = v
[docs] def initialize(self, init=None, ctx=None, verbose=False, force_reinit=False): """Initializes all Parameters managed by this dictionary to be used for :py:class:`NDArray` API. It has no effect when using :py:class:`Symbol` API. Parameters ---------- init : Initializer Global default Initializer to be used when :py:meth:`Parameter.init` is ``None``. Otherwise, :py:meth:`Parameter.init` takes precedence. ctx : Context or list of Context Keeps a copy of Parameters on one or many context(s). verbose : bool, default False Whether to verbosely print out details on initialization. force_reinit : bool, default False Whether to force re-initialization if parameter is already initialized. """ if init is None: init = anp.random.uniform if verbose: init.set_verbosity(verbose=verbose) for _, v in self.items(): v.initialize( init=None, ctx=ctx, default_init=init, force_reinit=force_reinit )
[docs] def reset_ctx(self, ctx): """Re-assign all Parameters to other contexts. Parameters ---------- ctx : Context or list of Context, default :py:meth:`context.current_context()`. Assign Parameter to given context. If ctx is a list of Context, a copy will be made for each context. """ for i in self.values(): i.reset_ctx(ctx)
[docs] def list_ctx(self): """Returns a list of all the contexts on which the underlying Parameters are initialized.""" s = set() for i in self.values(): s.update(i.list_ctx()) return list(s)
[docs] def setattr(self, name, value): """Set an attribute to a new value for all Parameters. For example, set grad_req to null if you don't need gradient w.r.t a model's Parameters:: model.collect_params().setattr('grad_req', 'null') or change the learning rate multiplier:: model.collect_params().setattr('lr_mult', 0.5) Parameters ---------- name : str Name of the attribute. value : valid type for attribute name The new value for the attribute. """ for i in self.values(): setattr(i, name, value)
class NameManager: """NameManager to do automatic naming. Developers can also inherit from this class to change naming behavior. """ _current = threading.local() def __init__(self): self._counter = {} self._old_manager = None def get(self, name, hint): """Get the canonical name for a symbol. This is the default implementation. If the user specifies a name, the user-specified name will be used. When user does not specify a name, we automatically generate a name based on the hint string. Parameters ---------- name : str or None The name specified by the user. hint : str A hint string, which can be used to generate name. Returns ------- full_name : str A canonical name for the symbol. """ if name: return name if hint not in self._counter: self._counter[hint] = 0 name = "%s%d" % (hint, self._counter[hint]) self._counter[hint] += 1 return name def __enter__(self): if not hasattr(NameManager._current, "value"): NameManager._current.value = NameManager() self._old_manager = NameManager._current.value NameManager._current.value = self return self def __exit__(self, ptype, value, trace): assert self._old_manager NameManager._current.value = self._old_manager class Prefix(NameManager): """A name manager that attaches a prefix to all names. Examples -------- >>> import mxnet as mx >>> data = mx.symbol.Variable('data') >>> with mx.name.Prefix('mynet_'): net = mx.symbol.FullyConnected(data, num_hidden=10, name='fc1') >>> net.list_arguments() ['data', 'mynet_fc1_weight', 'mynet_fc1_bias'] """ def __init__(self, prefix): super(Prefix, self).__init__() self._prefix = prefix def get(self, name, hint): name = super(Prefix, self).get(name, hint) return self._prefix + name # initialize the default name manager NameManager._current.value = NameManager() class _BlockScope: """Scope for collecting child `Block` s.""" _current = threading.local() def __init__(self, block): self._block = block self._counter = {} self._old_scope = None self._name_scope = None @staticmethod def create(prefix, params, hint): """Creates prefix and params for new `Block`.""" current = getattr(_BlockScope._current, "value", None) if current is None: if prefix is None: if not hasattr(NameManager._current, "value"): NameManager._current.value = NameManager() prefix = NameManager._current.value.get(None, hint) + "_" if params is None: params = ParameterDict(prefix) else: params = ParameterDict(params.prefix, params) return prefix, params if prefix is None: count = current._counter.get(hint, 0) prefix = "%s%d_" % (hint, count) current._counter[hint] = count + 1 if params is None: parent = current._block.params params = ParameterDict(parent.prefix + prefix, parent._shared) else: params = ParameterDict(params.prefix, params) return current._block.prefix + prefix, params def __enter__(self): if self._block._empty_prefix: return self self._old_scope = getattr(_BlockScope._current, "value", None) _BlockScope._current.value = self self._name_scope = Prefix(self._block.prefix) self._name_scope.__enter__() return self def __exit__(self, ptype, value, trace): if self._block._empty_prefix: return self._name_scope.__exit__(ptype, value, trace) self._name_scope = None _BlockScope._current.value = self._old_scope
[docs] class Block: """Base class for all neural network layers and models. Your models should subclass this class. :py:class:`Block` can be nested recursively in a tree structure. You can create and assign child :py:class:`Block` as regular attributes:: from mxnet.gluon import Block, nn from mxnet import ndarray as F class Model(Block): def __init__(self, **kwargs): super(Model, self).__init__(**kwargs) # use name_scope to give child Blocks appropriate names. with self.name_scope(): self.dense0 = nn.Dense(20) self.dense1 = nn.Dense(20) def forward(self, x): x = F.relu(self.dense0(x)) return F.relu(self.dense1(x)) model = Model() model.initialize(ctx=mx.cpu(0)) model(F.zeros((10, 10), ctx=mx.cpu(0))) Child :py:class:`Block` assigned this way will be registered and :py:meth:`collect_params` will collect their Parameters recursively. You can also manually register child blocks with :py:meth:`register_child`. Parameters ---------- prefix : str Prefix acts like a name space. All children blocks created in parent block's :py:meth:`name_scope` will have parent block's prefix in their name. Please refer to `naming tutorial </api/python/docs/tutorials/packages/gluon/blocks/naming.html>`__ for more info on prefix and naming. params : ParameterDict or None :py:class:`ParameterDict` for sharing weights with the new :py:class:`Block`. For example, if you want ``dense1`` to share ``dense0``'s weights, you can do:: dense0 = nn.Dense(20) dense1 = nn.Dense(20, params=dense0.collect_params()) """ def __init__(self, prefix=None, params=None): self._empty_prefix = prefix == "" self._prefix, self._params = _BlockScope.create(prefix, params, self._alias()) self._name = self._prefix[:-1] if self._prefix.endswith("_") else self._prefix self._scope = _BlockScope(self) self._children = OrderedDict() self._reg_params = {} self._forward_hooks = OrderedDict() self._forward_pre_hooks = OrderedDict() def __repr__(self): s = "{name}(\n{modstr}\n)" modstr = "\n".join( [ " ({key}): {block}".format(key=key, block=_indent(block.__repr__(), 2)) for key, block in self.__dict__.items() if isinstance(block, Block) ] ) return s.format(name=self.__class__.__name__, modstr=modstr) def __setattr__(self, name, value): """Registers parameters.""" if hasattr(self, name): existing = getattr(self, name) if isinstance(existing, (Parameter, Block)) and not isinstance( value, type(existing) ): raise TypeError( "Changing attribute type for {name} from {type1} to {type2}" "is not allowed.".format( name=name, type1=type(existing), type2=type(value) ) ) if isinstance(value, Block): self.register_child(value, name) elif isinstance(value, Parameter): assert name not in self._reg_params, ( "Overriding Parameter attribute %s is not allowed. " "If you want to share parameters between blocks, please set " "'params' at Block construction instead." ) self._reg_params[name] = value super(Block, self).__setattr__(name, value) def _check_container_with_block(self): children = set(self._children.values()) def _find_unregistered_block_in_container(data): # Find whether a nested container structure contains Blocks if isinstance(data, (list, tuple)): for ele in data: if _find_unregistered_block_in_container(ele): return True return False elif isinstance(data, dict): for _, v in data.items(): if _find_unregistered_block_in_container(v): return True return False elif isinstance(data, Block): return not data in children else: return False for k, v in self.__dict__.items(): if isinstance(v, (list, tuple, dict)) and not ( k.startswith("__") or k == "_children" ): if _find_unregistered_block_in_container(v): warnings.warn( '"{name}" is an unregistered container with Blocks. ' "Note that Blocks inside the list, tuple or dict will not be " "registered automatically. Make sure to register them using " "register_child() or switching to " "nn.Sequential/nn.HybridSequential instead. ".format( name=self.__class__.__name__ + "." + k ), stacklevel=3, ) def _alias(self): return self.__class__.__name__.lower() @property def prefix(self): """Prefix of this :py:class:`Block`.""" return self._prefix @property def name(self): """Name of this :py:class:`Block`, without '_' in the end.""" return self._name
[docs] def name_scope(self): """Returns a name space object managing a child :py:class:`Block` and parameter names. Should be used within a ``with`` statement:: with self.name_scope(): self.dense = nn.Dense(20) Please refer to `the naming tutorial </api/python/docs/tutorials/packages/gluon/blocks/naming.html>`__ for more info on prefix and naming. """ return self._scope
@property def params(self): """Returns this :py:class:`Block`'s parameter dictionary (does not include its children's parameters).""" return self._params
[docs] def collect_params(self, select=None): """Returns a :py:class:`ParameterDict` containing this :py:class:`Block` and all of its children's Parameters(default), also can returns the select :py:class:`ParameterDict` which match some given regular expressions. For example, collect the specified parameters in ['conv1_weight', 'conv1_bias', 'fc_weight', 'fc_bias']:: model.collect_params('conv1_weight|conv1_bias|fc_weight|fc_bias') or collect all parameters whose names end with 'weight' or 'bias', this can be done using regular expressions:: model.collect_params('.*weight|.*bias') Parameters ---------- select : str regular expressions Returns ------- The selected :py:class:`ParameterDict` """ # We need to check here because blocks inside containers are not supported. self._check_container_with_block() ret = ParameterDict(self._params.prefix) if not select: ret.update(self.params) else: pattern = re.compile(select) ret.update( { name: value for name, value in self.params.items() if pattern.match(name) } ) for cld in self._children.values(): ret.update(cld.collect_params(select=select)) return ret
def _collect_params_with_prefix(self, prefix=""): if prefix: prefix += "." ret = {prefix + key: val for key, val in self._reg_params.items()} for name, child in self._children.items(): ret.update(child._collect_params_with_prefix(prefix + name)) return ret
[docs] def register_child(self, block, name=None): """Registers block as a child of self. :py:class:`Block` s assigned to self as attributes will be registered automatically.""" if name is None: name = str(len(self._children)) self._children[name] = block
# def register_forward_pre_hook(self, hook): # r"""Registers a forward pre-hook on the block. # The hook function is called immediately before :func:`forward`. # It should not modify the input or output. # Parameters # ---------- # hook : callable # The forward hook function of form `hook(block, input) -> None`. # Returns # ------- # :class:`mxnet.gluon.utils.HookHandle` # """ # handle = HookHandle() # handle.attach(self._forward_pre_hooks, hook) # return handle # def register_forward_hook(self, hook): # r"""Registers a forward hook on the block. # The hook function is called immediately after :func:`forward`. # It should not modify the input or output. # Parameters # ---------- # hook : callable # The forward hook function of form `hook(block, input, output) -> None`. # Returns # ------- # :class:`mxnet.gluon.utils.HookHandle` # """ # handle = HookHandle() # handle.attach(self._forward_hooks, hook) # return handle
[docs] def apply(self, fn): r"""Applies ``fn`` recursively to every child block as well as self. Parameters ---------- fn : callable Function to be applied to each submodule, of form `fn(block)`. Returns ------- this block """ for cld in self._children.values(): cld.apply(fn) fn(self) return self
[docs] def initialize(self, init=None, ctx=None, verbose=False, force_reinit=False): """Initializes :py:class:`Parameter` s of this :py:class:`Block` and its children. Equivalent to ``block.collect_params().initialize(...)`` Parameters ---------- init : Initializer Global default Initializer to be used when :py:meth:`Parameter.init` is ``None``. Otherwise, :py:meth:`Parameter.init` takes precedence. ctx : Context or list of Context Keeps a copy of Parameters on one or many context(s). verbose : bool, default False Whether to verbosely print out details on initialization. force_reinit : bool, default False Whether to force re-initialization if parameter is already initialized. """ if init is None: init = anp.random.uniform self.collect_params().initialize(init, ctx, verbose, force_reinit)
[docs] def hybridize(self, active=True, **kwargs): """Please refer description of HybridBlock hybridize().""" for cld in self._children.values(): cld.hybridize(active, **kwargs)
[docs] def cast(self, dtype): """Cast this Block to use another data type. Parameters ---------- dtype : str or numpy.dtype The new data type. """ for child in self._children.values(): child.cast(dtype) for _, param in self.params.items(): param.cast(dtype)
def __call__(self, *args): """Calls forward. Only accepts positional arguments.""" # for hook in self._forward_pre_hooks.values(): # hook(self, args) out = self.forward(*args) # for hook in self._forward_hooks.values(): # hook(self, args, out) # if _mx_npx.is_np_array(): # _check_all_np_ndarrays(out) return out
[docs] def forward(self, *args): """Overrides to implement forward computation using :py:class:`NDArray`. Only accepts positional arguments. Parameters ---------- *args : list of NDArray Input tensors. """ raise NotImplementedError
# pylint: disable= invalid-name
[docs] def hybrid_forward(self, *args): return self(*args)