24 #ifndef TVM_TOPI_NN_RMS_NORM_H_
25 #define TVM_TOPI_NN_RMS_NORM_H_
51 double epsilon, std::string name =
"T_rms_norm",
53 const auto& data_type = data->dtype;
54 const auto& weight_type = weight.defined() ? weight->dtype : data_type;
55 ICHECK(data_type == weight_type) <<
"rms_norm: data and weight must have the same type";
60 auto square =
multiply(data_fp32, data_fp32);
61 auto square_sum =
sum(square, axis,
false,
true);
63 auto ndim = data_fp32->shape.size();
64 ICHECK_NE(ndim, 0) <<
"Cannot reduce a 0 dim Tensor";
65 auto real_axis =
GetRealAxis(
static_cast<int>(ndim), axis);
66 auto reduce_extent =
make_const(data_fp32->dtype, 1);
67 for (
int i : real_axis) {
68 reduce_extent *= data_fp32->shape[i];
70 auto rsqrt_func = [&](
const Array<Var>& indices) {
71 Array<Var> non_reduce_indices;
72 for (
int i = 0, n =
static_cast<int>(indices.size()); i < n; ++i) {
73 if (std::find(real_axis.begin(), real_axis.end(), i) == real_axis.end()) {
74 non_reduce_indices.push_back(indices[i]);
81 auto rsqrt_shape = Array<PrimExpr>();
82 for (
int i = 0, n =
static_cast<int>(data_fp32->shape.size()); i < n; ++i) {
83 if (std::find(real_axis.begin(), real_axis.end(), i) == real_axis.end()) {
84 rsqrt_shape.push_back(data_fp32->shape[i]);
89 auto rms_norm_func = [&](
const Array<Var>& indices) {
90 Array<Var> reduce_indices, non_reduce_indices;
91 for (
int i = 0, n =
static_cast<int>(indices.size()); i < n; ++i) {
92 if (std::find(real_axis.begin(), real_axis.end(), i) != real_axis.end()) {
93 reduce_indices.push_back(indices[i]);
95 non_reduce_indices.push_back(indices[i]);
98 auto output =
rsqrt(non_reduce_indices) * data_fp32(indices) * weight_fp32(reduce_indices);
static DataType Float(int bits, int lanes=1)
Construct an float type.
Definition: data_type.h:291
Tensor structure representing a possible input, or intermediate computation result.
Definition: tensor.h:100
Tensor expression language DSL.
Definition: extracted_task.h:33
Tensor compute(Array< PrimExpr > shape, FCompute fcompute, std::string name="tensor", std::string tag="", Map< String, ffi::Any > attrs={})
Construct a new tensor by computing over shape, using the computation rule: result_tensor[axis] = fco...
PrimExpr make_const(DataType t, ValueType value, Span span=Span())
Make a const value with certain data type.
Definition: op.h:980
Tensor rms_norm(const Tensor &data, const Tensor &weight, const Array< Integer > &axis, double epsilon, std::string name="T_rms_norm", std::string tag=kInjective)
Root mean square normalization.
Definition: rms_norm.h:50
Tensor sum(const Tensor &data, const Optional< Array< Integer >> &axis, bool keepdims=false, bool atleast1d=false)
Creates an operation that sums array elements over a given axis.
Definition: reduction.h:326
constexpr auto kInjective
Definition: tags.h:33
tvm::PrimExpr multiply(const tvm::PrimExpr &a, const tvm::PrimExpr &b)
Definition: broadcast.h:225
Tensor cast(const Tensor &x, DataType type, std::string name="T_cast", std::string tag=kElementWise)
Cast each element of x to the given type. If expr is scalar and type is a corresponding vector type,...
Definition: elemwise.h:281
Tensor rsqrt(const Tensor &x, std::string name="tensor", std::string tag=kElementWise)
Creates an operation that returns rsqrt of a given tensor.
Definition: elemwise.h:235
std::vector< int > GetRealAxis(int ndim, const Optional< Array< Integer >> &axis)
Convert a reduction axis which could be empty or have negative elements into a real axis with valid d...
Definition: reduction.h:65
Performance counters for profiling via the PAPI library.
Definition: analyzer.h:37
PrimExpr rsqrt(PrimExpr x, Span span=Span())
Definition: op.h:731
Operation node can generate one or multiple Tensors.
Reduction op constructors.