Source code for deepmd.nvnmd.utils.network

# SPDX-License-Identifier: LGPL-3.0-or-later
import logging

import numpy as np

from deepmd.env import (
    GLOBAL_TF_FLOAT_PRECISION,
    op_module,
    tf,
)
from deepmd.nvnmd.utils.config import (
    nvnmd_cfg,
)
from deepmd.nvnmd.utils.weight import (
    get_constant_initializer,
)
from deepmd.utils.network import (
    variable_summaries,
)

log = logging.getLogger(__name__)


[docs]def get_sess(): init_op = tf.global_variables_initializer() sess = tf.Session() sess.run(init_op) return sess
[docs]def matmul2_qq(a, b, nbit): r"""Quantized matmul operation for 2d tensor. a and b is input tensor, nbit represent quantification precision. """ sh_a = a.get_shape().as_list() sh_b = b.get_shape().as_list() a = tf.reshape(a, [-1, 1, sh_a[1]]) b = tf.reshape(tf.transpose(b), [1, sh_b[1], sh_b[0]]) y = a * b y = qf(y, nbit) y = tf.reduce_sum(y, axis=2) return y
[docs]def matmul3_qq(a, b, nbit): r"""Quantized matmul operation for 3d tensor. a and b is input tensor, nbit represent quantification precision. """ sh_a = a.get_shape().as_list() sh_b = b.get_shape().as_list() a = tf.reshape(a, [-1, sh_a[1], 1, sh_a[2]]) b = tf.reshape(tf.transpose(b, [0, 2, 1]), [-1, 1, sh_b[2], sh_b[1]]) y = a * b if nbit == -1: y = y else: y = qf(y, nbit) y = tf.reduce_sum(y, axis=3) return y
[docs]def qf(x, nbit): r"""Quantize and floor tensor `x` with quantification precision `nbit`.""" prec = 2**nbit y = tf.floor(x * prec) / prec y = x + tf.stop_gradient(y - x) return y
[docs]def qr(x, nbit): r"""Quantize and round tensor `x` with quantification precision `nbit`.""" prec = 2**nbit y = tf.round(x * prec) / prec y = x + tf.stop_gradient(y - x) return y
[docs]def tanh4(x): with tf.name_scope("tanh4"): sign = tf.sign(x) xclp = tf.clip_by_value(x, -2, 2) xabs = tf.abs(xclp) y1 = (1.0 / 16.0) * tf.pow(xabs, 4) + (-1.0 / 4.0) * tf.pow(xabs, 3) + xabs y2 = y1 * sign return y2
[docs]def one_layer_wb( shape, outputs_size, bavg, stddev, precision, trainable, initial_variables, seed, uniform_seed, name, ): if nvnmd_cfg.restore_fitting_net: # initializer w_initializer = get_constant_initializer(nvnmd_cfg.weight, "matrix") b_initializer = get_constant_initializer(nvnmd_cfg.weight, "bias") else: w_initializer = tf.random_normal_initializer( stddev=stddev / np.sqrt(shape[1] + outputs_size), seed=seed if (seed is None or uniform_seed) else seed + 0, ) b_initializer = tf.random_normal_initializer( stddev=stddev, mean=bavg, seed=seed if (seed is None or uniform_seed) else seed + 1, ) if initial_variables is not None: w_initializer = tf.constant_initializer(initial_variables[name + "/matrix"]) b_initializer = tf.constant_initializer(initial_variables[name + "/bias"]) # variable w = tf.get_variable( "matrix", [shape[1], outputs_size], precision, w_initializer, trainable=trainable, ) variable_summaries(w, "matrix") b = tf.get_variable( "bias", [outputs_size], precision, b_initializer, trainable=trainable ) variable_summaries(b, "bias") return w, b
[docs]def one_layer_t( shape, outputs_size, bavg, stddev, precision, trainable, initial_variables, seed, uniform_seed, name, ): NTAVC = nvnmd_cfg.fitn["NTAVC"] if nvnmd_cfg.restore_fitting_net: t_initializer = get_constant_initializer(nvnmd_cfg.weight, "tweight") else: t_initializer = tf.random_normal_initializer( stddev=stddev / np.sqrt(NTAVC + outputs_size), seed=seed if (seed is None or uniform_seed) else seed + 0, ) if initial_variables is not None: t_initializer = tf.constant_initializer( initial_variables[name + "/tweight"] ) t = tf.get_variable( "tweight", [NTAVC, outputs_size], precision, t_initializer, trainable=trainable, ) variable_summaries(t, "matrix") return t
[docs]def one_layer( inputs, outputs_size, activation_fn=tf.nn.tanh, precision=GLOBAL_TF_FLOAT_PRECISION, stddev=1.0, bavg=0.0, name="linear", reuse=None, seed=None, use_timestep=False, trainable=True, useBN=False, uniform_seed=False, initial_variables=None, mixed_prec=None, final_layer=False, ): r"""Build one layer with continuous or quantized value. Its weight and bias can be initialed with random or constant value. """ # USE FOR NEW FITTINGNET is_layer = (nvnmd_cfg.version == 1) and ("layer_0" in name) with tf.variable_scope(name, reuse=reuse): if is_layer: t = one_layer_t( None, outputs_size, bavg, stddev, precision, trainable, initial_variables, seed, uniform_seed, name, ) # NTAVC = nvnmd_cfg.fitn["NTAVC"] nd = inputs.get_shape().as_list()[1] - NTAVC inputs2 = tf.slice(inputs, [0, nd], [-1, NTAVC]) inputs = tf.slice(inputs, [0, 0], [-1, nd]) # w & b shape = inputs.get_shape().as_list() w, b = one_layer_wb( shape, outputs_size, bavg, stddev, precision, trainable, initial_variables, seed, uniform_seed, name, ) if nvnmd_cfg.quantize_fitting_net: NBIT_DATA_FL = nvnmd_cfg.nbit["NBIT_FIT_DATA_FL"] NBIT_SHORT_FL = nvnmd_cfg.nbit["NBIT_FIT_SHORT_FL"] # w with tf.variable_scope("w", reuse=reuse): w = op_module.quantize_nvnmd(w, 1, NBIT_DATA_FL, NBIT_DATA_FL, -1) w = tf.ensure_shape(w, [shape[1], outputs_size]) # b with tf.variable_scope("b", reuse=reuse): b = op_module.quantize_nvnmd(b, 1, NBIT_DATA_FL, NBIT_DATA_FL, -1) b = tf.ensure_shape(b, [outputs_size]) # x with tf.variable_scope("x", reuse=reuse): x = op_module.quantize_nvnmd(inputs, 1, NBIT_DATA_FL, NBIT_DATA_FL, -1) inputs = tf.ensure_shape(x, [None, shape[1]]) # wx # normlize weight mode: 0 all | 1 column norm_mode = 0 if final_layer else 1 wx = op_module.matmul_fitnet_nvnmd( inputs, w, NBIT_DATA_FL, NBIT_SHORT_FL, norm_mode ) with tf.variable_scope("wx", reuse=reuse): wx = op_module.quantize_nvnmd(wx, 1, NBIT_DATA_FL, NBIT_DATA_FL - 2, -1) wx = tf.ensure_shape(wx, [None, outputs_size]) if is_layer: wx2 = tf.matmul(inputs2, t) with tf.variable_scope("wx2", reuse=reuse): wx2 = op_module.quantize_nvnmd( wx2, 1, NBIT_DATA_FL, NBIT_DATA_FL, -1 ) wx2 = tf.ensure_shape(wx2, [None, outputs_size]) wx = wx + wx2 # wxb wxb = wx + b with tf.variable_scope("wxb", reuse=reuse): wxb = op_module.quantize_nvnmd(wxb, 1, NBIT_DATA_FL, NBIT_DATA_FL, -1) wxb = tf.ensure_shape(wxb, [None, outputs_size]) # actfun if activation_fn is not None: # set activation function as tanh4 y = op_module.tanh4_flt_nvnmd(wxb) else: y = wxb with tf.variable_scope("actfun", reuse=reuse): y = op_module.quantize_nvnmd(y, 1, NBIT_DATA_FL, NBIT_DATA_FL, -1) y = tf.ensure_shape(y, [None, outputs_size]) else: if is_layer: hidden = tf.matmul(inputs, w) + tf.matmul(inputs2, t) + b else: hidden = tf.matmul(inputs, w) + b # set activation function as tanh4 y = tanh4(hidden) if (activation_fn is not None) else hidden # 'reshape' is necessary # the next layer needs shape of input tensor to build weight y = tf.reshape(y, [-1, outputs_size]) return y