Source code for deepmd.nvnmd.utils.network

# SPDX-License-Identifier: LGPL-3.0-or-later
import logging

import numpy as np

from deepmd.env import (
    GLOBAL_TF_FLOAT_PRECISION,
    op_module,
    tf,
)
from deepmd.nvnmd.utils.config import (
    nvnmd_cfg,
)
from deepmd.nvnmd.utils.weight import (
    get_constant_initializer,
)
from deepmd.utils.network import (
    variable_summaries,
)

log = logging.getLogger(__name__)


[docs]def get_sess():
    init_op = tf.global_variables_initializer()
    sess = tf.Session()
    sess.run(init_op)
    return sess


[docs]def matmul2_qq(a, b, nbit):
    r"""Quantized matmul operation for 2d tensor.
    a and b is input tensor, nbit represent quantification precision.
    """
    sh_a = a.get_shape().as_list()
    sh_b = b.get_shape().as_list()
    a = tf.reshape(a, [-1, 1, sh_a[1]])
    b = tf.reshape(tf.transpose(b), [1, sh_b[1], sh_b[0]])
    y = a * b
    y = qf(y, nbit)
    y = tf.reduce_sum(y, axis=2)
    return y


[docs]def matmul3_qq(a, b, nbit):
    r"""Quantized matmul operation for 3d tensor.
    a and b is input tensor, nbit represent quantification precision.
    """
    sh_a = a.get_shape().as_list()
    sh_b = b.get_shape().as_list()
    a = tf.reshape(a, [-1, sh_a[1], 1, sh_a[2]])
    b = tf.reshape(tf.transpose(b, [0, 2, 1]), [-1, 1, sh_b[2], sh_b[1]])
    y = a * b
    if nbit == -1:
        y = y
    else:
        y = qf(y, nbit)
    y = tf.reduce_sum(y, axis=3)
    return y


[docs]def qf(x, nbit):
    r"""Quantize and floor tensor `x` with quantification precision `nbit`."""
    prec = 2**nbit

    y = tf.floor(x * prec) / prec
    y = x + tf.stop_gradient(y - x)
    return y


[docs]def qr(x, nbit):
    r"""Quantize and round tensor `x` with quantification precision `nbit`."""
    prec = 2**nbit

    y = tf.round(x * prec) / prec
    y = x + tf.stop_gradient(y - x)
    return y


[docs]def tanh4(x):
    with tf.name_scope("tanh4"):
        sign = tf.sign(x)
        xclp = tf.clip_by_value(x, -2, 2)
        xabs = tf.abs(xclp)
        y1 = (1.0 / 16.0) * tf.pow(xabs, 4) + (-1.0 / 4.0) * tf.pow(xabs, 3) + xabs
        y2 = y1 * sign
        return y2


[docs]def one_layer_wb(
    shape,
    outputs_size,
    bavg,
    stddev,
    precision,
    trainable,
    initial_variables,
    seed,
    uniform_seed,
    name,
):
    if nvnmd_cfg.restore_fitting_net:
        # initializer
        w_initializer = get_constant_initializer(nvnmd_cfg.weight, "matrix")
        b_initializer = get_constant_initializer(nvnmd_cfg.weight, "bias")
    else:
        w_initializer = tf.random_normal_initializer(
            stddev=stddev / np.sqrt(shape[1] + outputs_size),
            seed=seed if (seed is None or uniform_seed) else seed + 0,
        )
        b_initializer = tf.random_normal_initializer(
            stddev=stddev,
            mean=bavg,
            seed=seed if (seed is None or uniform_seed) else seed + 1,
        )
        if initial_variables is not None:
            w_initializer = tf.constant_initializer(initial_variables[name + "/matrix"])
            b_initializer = tf.constant_initializer(initial_variables[name + "/bias"])
    # variable
    w = tf.get_variable(
        "matrix",
        [shape[1], outputs_size],
        precision,
        w_initializer,
        trainable=trainable,
    )
    variable_summaries(w, "matrix")
    b = tf.get_variable(
        "bias", [outputs_size], precision, b_initializer, trainable=trainable
    )
    variable_summaries(b, "bias")

    return w, b


[docs]def one_layer_t(
    shape,
    outputs_size,
    bavg,
    stddev,
    precision,
    trainable,
    initial_variables,
    seed,
    uniform_seed,
    name,
):
    NTAVC = nvnmd_cfg.fitn["NTAVC"]
    if nvnmd_cfg.restore_fitting_net:
        t_initializer = get_constant_initializer(nvnmd_cfg.weight, "tweight")
    else:
        t_initializer = tf.random_normal_initializer(
            stddev=stddev / np.sqrt(NTAVC + outputs_size),
            seed=seed if (seed is None or uniform_seed) else seed + 0,
        )
        if initial_variables is not None:
            t_initializer = tf.constant_initializer(
                initial_variables[name + "/tweight"]
            )
    t = tf.get_variable(
        "tweight",
        [NTAVC, outputs_size],
        precision,
        t_initializer,
        trainable=trainable,
    )
    variable_summaries(t, "matrix")
    return t


[docs]def one_layer(
    inputs,
    outputs_size,
    activation_fn=tf.nn.tanh,
    precision=GLOBAL_TF_FLOAT_PRECISION,
    stddev=1.0,
    bavg=0.0,
    name="linear",
    reuse=None,
    seed=None,
    use_timestep=False,
    trainable=True,
    useBN=False,
    uniform_seed=False,
    initial_variables=None,
    mixed_prec=None,
    final_layer=False,
):
    r"""Build one layer with continuous or quantized value.
    Its weight and bias can be initialed with random or constant value.
    """
    # USE FOR NEW FITTINGNET
    is_layer = (nvnmd_cfg.version == 1) and ("layer_0" in name)
    with tf.variable_scope(name, reuse=reuse):
        if is_layer:
            t = one_layer_t(
                None,
                outputs_size,
                bavg,
                stddev,
                precision,
                trainable,
                initial_variables,
                seed,
                uniform_seed,
                name,
            )
            #
            NTAVC = nvnmd_cfg.fitn["NTAVC"]
            nd = inputs.get_shape().as_list()[1] - NTAVC
            inputs2 = tf.slice(inputs, [0, nd], [-1, NTAVC])
            inputs = tf.slice(inputs, [0, 0], [-1, nd])
        # w & b
        shape = inputs.get_shape().as_list()
        w, b = one_layer_wb(
            shape,
            outputs_size,
            bavg,
            stddev,
            precision,
            trainable,
            initial_variables,
            seed,
            uniform_seed,
            name,
        )
        if nvnmd_cfg.quantize_fitting_net:
            NBIT_DATA_FL = nvnmd_cfg.nbit["NBIT_FIT_DATA_FL"]
            NBIT_SHORT_FL = nvnmd_cfg.nbit["NBIT_FIT_SHORT_FL"]
            # w
            with tf.variable_scope("w", reuse=reuse):
                w = op_module.quantize_nvnmd(w, 1, NBIT_DATA_FL, NBIT_DATA_FL, -1)
                w = tf.ensure_shape(w, [shape[1], outputs_size])
            # b
            with tf.variable_scope("b", reuse=reuse):
                b = op_module.quantize_nvnmd(b, 1, NBIT_DATA_FL, NBIT_DATA_FL, -1)
                b = tf.ensure_shape(b, [outputs_size])
            # x
            with tf.variable_scope("x", reuse=reuse):
                x = op_module.quantize_nvnmd(inputs, 1, NBIT_DATA_FL, NBIT_DATA_FL, -1)
                inputs = tf.ensure_shape(x, [None, shape[1]])
            # wx
            # normlize weight mode: 0 all | 1 column
            norm_mode = 0 if final_layer else 1
            wx = op_module.matmul_fitnet_nvnmd(
                inputs, w, NBIT_DATA_FL, NBIT_SHORT_FL, norm_mode
            )

            with tf.variable_scope("wx", reuse=reuse):
                wx = op_module.quantize_nvnmd(wx, 1, NBIT_DATA_FL, NBIT_DATA_FL - 2, -1)
                wx = tf.ensure_shape(wx, [None, outputs_size])

            if is_layer:
                wx2 = tf.matmul(inputs2, t)
                with tf.variable_scope("wx2", reuse=reuse):
                    wx2 = op_module.quantize_nvnmd(
                        wx2, 1, NBIT_DATA_FL, NBIT_DATA_FL, -1
                    )
                    wx2 = tf.ensure_shape(wx2, [None, outputs_size])
                wx = wx + wx2
            # wxb
            wxb = wx + b

            with tf.variable_scope("wxb", reuse=reuse):
                wxb = op_module.quantize_nvnmd(wxb, 1, NBIT_DATA_FL, NBIT_DATA_FL, -1)
                wxb = tf.ensure_shape(wxb, [None, outputs_size])
            # actfun
            if activation_fn is not None:
                # set activation function as tanh4
                y = op_module.tanh4_flt_nvnmd(wxb)
            else:
                y = wxb

            with tf.variable_scope("actfun", reuse=reuse):
                y = op_module.quantize_nvnmd(y, 1, NBIT_DATA_FL, NBIT_DATA_FL, -1)
                y = tf.ensure_shape(y, [None, outputs_size])
        else:
            if is_layer:
                hidden = tf.matmul(inputs, w) + tf.matmul(inputs2, t) + b
            else:
                hidden = tf.matmul(inputs, w) + b
            # set activation function as tanh4
            y = tanh4(hidden) if (activation_fn is not None) else hidden
    # 'reshape' is necessary
    # the next layer needs shape of input tensor to build weight
    y = tf.reshape(y, [-1, outputs_size])
    return y