24 #ifndef TVM_TOPI_ROCM_DENSE_H_    25 #define TVM_TOPI_ROCM_DENSE_H_    56   ICHECK_EQ(data->shape.size(), 2) << 
"dense requires 2-D data";
    57   ICHECK_EQ(weight->shape.size(), 2) << 
"dense requires 2-D weight";
    59     ICHECK_EQ(bias->shape.size(), 1) << 
"dense requires 1-D bias";
    62   auto batch = data->shape[0];
    63   auto in_dim = data->shape[1];
    64   auto out_dim = weight->shape[0];
    66   if (target->GetLibs().count(
"rocblas")) {
    67     ICHECK_EQ(data->dtype, out_dtype) << 
"Mixed precision not supported.";
    71           {batch, out_dim}, [&](
Var i, 
Var j) { 
return mm(i, j) + bias(j); }, 
"tensor", 
kBroadcast);
    89   if (target->kind->name == 
"rocm" && target->GetLibs().count(
"rocblas")) {
    99 #endif  // TVM_TOPI_ROCM_DENSE_H_ tvm::te::Tensor dense_rocm(const Target &target, const tvm::te::Tensor &data, const tvm::te::Tensor &weight, const tvm::te::Tensor &bias, const DataType &out_dtype)
Implementation of dense for rocm backend. 
Definition: dense.h:53
Schedule for extern followed by injective ops. 
Global schedule container For operations and all the operations they depend on. The schedule per Oper...
Definition: schedule.h:317
Schedule schedule_dense(const Target &target, const Array< Tensor > &outs)
Create a rocm schedule for dense. 
Definition: dense.h:88
Utility functions for handling arrays. 
runtime implementation for LibTorch/TorchScript. 
Definition: analyzer.h:36
Tensor expression language DSL. 
Definition: extracted_task.h:33
a named variable in TIR 
Definition: var.h:88
constexpr auto kBroadcast
Definition: tags.h:36
tvm::te::Tensor dense(const tvm::te::Tensor &data, const tvm::te::Tensor &weight, const tvm::te::Tensor &bias, const DataType &out_dtype)
Creates an operation that calculates data * weight^T + bias. 
Definition: dense.h:48
bool defined() const
Definition: object.h:544
Runtime primitive data type. 
Definition: data_type.h:41
Array, container representing a contiguous sequence of ObjectRefs. 
Definition: array.h:289
Schedule schedule_dense(const Target &target, const Array< Tensor > &outs)
Create a CUDA schedule for dense. 
Definition: dense.h:88
Managed reference class to TargetNode. 
Definition: target.h:183
Tensor structure representing a possible input, or intermediate computation result. 
Definition: tensor.h:102
Operation node can generate one or multiple Tensors. 
Tensor compute(Array< PrimExpr > shape, FCompute fcompute, std::string name="tensor", std::string tag="", Map< String, ObjectRef > attrs={})
Construct a new tensor by computing over shape, using the computation rule: result_tensor[axis] = fco...
CUDA schedule for dense operation. 
Tensor rocblas_matmul(const Tensor &lhs, const Tensor &rhs, bool transa, bool transb)
Create an op that multiplies lhs and rhs with rocBLAS. 
Definition: rocblas.h:45
Schedule schedule_extern(const Target &target, const Array< Tensor > &outs)
Schedule an extern op followed by injective operations. 
Definition: extern.h:48
Generic function that can be specialzied on a per target basis.