24 #ifndef TVM_TOPI_ROCM_DENSE_H_
25 #define TVM_TOPI_ROCM_DENSE_H_
56 ICHECK_EQ(data->shape.size(), 2) <<
"dense requires 2-D data";
57 ICHECK_EQ(weight->shape.size(), 2) <<
"dense requires 2-D weight";
59 ICHECK_EQ(bias->shape.size(), 1) <<
"dense requires 1-D bias";
62 auto batch = data->shape[0];
63 auto in_dim = data->shape[1];
64 auto out_dim = weight->shape[0];
66 if (target->GetLibs().count(
"rocblas")) {
67 ICHECK_EQ(data->dtype, out_dtype) <<
"Mixed precision not supported.";
71 {batch, out_dim}, [&](
Var i,
Var j) {
return mm(i, j) + bias(j); },
"tensor",
kBroadcast);
89 if (target->kind->name ==
"rocm" && target->GetLibs().count(
"rocblas")) {
Utility functions for handling arrays.
Managed reference class to TargetNode.
Definition: target.h:200
Array, container representing a contiguous sequence of ObjectRefs.
Definition: array.h:289
Runtime primitive data type.
Definition: data_type.h:43
bool defined() const
Definition: object.h:552
Global schedule container For operations and all the operations they depend on. The schedule per Oper...
Definition: schedule.h:326
Tensor structure representing a possible input, or intermediate computation result.
Definition: tensor.h:102
a named variable in TIR
Definition: var.h:89
CUDA schedule for dense operation.
Schedule for extern followed by injective ops.
Generic function that can be specialzied on a per target basis.
Tensor expression language DSL.
Definition: extracted_task.h:33
Tensor compute(Array< PrimExpr > shape, FCompute fcompute, std::string name="tensor", std::string tag="", Map< String, ObjectRef > attrs={})
Construct a new tensor by computing over shape, using the computation rule: result_tensor[axis] = fco...
Tensor rocblas_matmul(const Tensor &lhs, const Tensor &rhs, bool transa, bool transb)
Create an op that multiplies lhs and rhs with rocBLAS.
Definition: rocblas.h:45
Schedule schedule_dense(const Target &target, const Array< Tensor > &outs)
Create a CUDA schedule for dense.
Definition: dense.h:88
Schedule schedule_extern(const Target &target, const Array< Tensor > &outs)
Schedule an extern op followed by injective operations.
Definition: extern.h:48
tvm::te::Tensor dense(const tvm::te::Tensor &data, const tvm::te::Tensor &weight, const tvm::te::Tensor &bias, const DataType &out_dtype)
Creates an operation that calculates data * weight^T + bias.
Definition: dense.h:48
tvm::te::Tensor dense_rocm(const Target &target, const tvm::te::Tensor &data, const tvm::te::Tensor &weight, const tvm::te::Tensor &bias, const DataType &out_dtype)
Implementation of dense for rocm backend.
Definition: dense.h:53
Schedule schedule_dense(const Target &target, const Array< Tensor > &outs)
Create a rocm schedule for dense.
Definition: dense.h:88
constexpr auto kBroadcast
Definition: tags.h:36
runtime implementation for LibTorch/TorchScript.
Definition: analyzer.h:36
Operation node can generate one or multiple Tensors.