24 #ifndef TVM_TOPI_CUDA_DENSE_H_
25 #define TVM_TOPI_CUDA_DENSE_H_
56 ICHECK_EQ(data->shape.size(), 2) <<
"dense requires 2-D data";
57 ICHECK_EQ(weight->shape.size(), 2) <<
"dense requires 2-D weight";
59 ICHECK_EQ(bias->shape.size(), 1) <<
"dense requires 1-D bias";
62 auto batch = data->shape[0];
63 auto in_dim = data->shape[1];
64 auto out_dim = weight->shape[0];
66 if (target->GetLibs().count(
"cublas")) {
67 ICHECK_EQ(data->dtype, out_dtype) <<
"Mixed precision not supported.";
71 {batch, out_dim}, [&](
Var i,
Var j) {
return mm(i, j) + bias(j); },
"tensor",
kBroadcast);
89 if (target->kind->name ==
"cuda" && target->GetLibs().count(
"cublas")) {
100 auto num_thread = 64;
103 s[
dense].split(k, num_thread, &ko, &kf);
104 auto dense_f = s.rfactor(
dense, kf)[0];
107 if (detail::contains(s->outputs,
dense->op)) {
110 out = outs[0]->op.output(0);
120 s[
dense].bind(tx, thread_x);
121 s[dense_f].compute_at(s[
dense], tx);
122 s[
dense].set_store_predicate(
static_cast<PrimExpr>(thread_x) == 0);
123 s[out].set_store_predicate(
static_cast<PrimExpr>(thread_x) == 0);
130 if (!detail::contains(s->outputs, op)) {
131 s[op].compute_inline();
133 for (
auto tensor : op->InputTensors()) {
134 if (tensor->op->InputTensors().size() > 0) {
135 traverse(tensor->op);
138 }
else if (op->tag ==
"dense") {
140 auto dense = op.output(0);
143 LOG(ERROR) <<
"Unsupported operator " << op->tag;
147 traverse(outs[0]->op);
Utility functions for handling arrays.
Reference to PrimExprNode.
Definition: expr.h:115
Range container
Definition: expr.h:725
Managed reference class to TargetNode.
Definition: target.h:200
Array, container representing a contiguous sequence of ObjectRefs.
Definition: array.h:289
void push_back(const T &item)
push a new item to the back of the list
Definition: array.h:457
Runtime primitive data type.
Definition: data_type.h:43
bool defined() const
Definition: object.h:552
const ObjectType * as() const
Try to downcast the internal Object to a raw pointer of a corresponding type.
Definition: object.h:910
Array< IterVar > axis
IterVar on each axis.
Definition: operation.h:207
A Compute op that compute a tensor on certain domain.
Definition: operation.h:226
Operation that produces tensors.
Definition: tensor.h:47
Global schedule container For operations and all the operations they depend on. The schedule per Oper...
Definition: schedule.h:326
Tensor structure representing a possible input, or intermediate computation result.
Definition: tensor.h:102
Iteration Variable, represents an iteration over an integer interval.
Definition: var.h:315
a named variable in TIR
Definition: var.h:89
External function interface to cuBLAS libraries.
Schedule for extern followed by injective ops.
Generic function that can be specialzied on a per target basis.
Tensor expression language DSL.
Definition: extracted_task.h:33
Schedule create_schedule(Array< Operation > ops)
Create a schedule for array of ops(and their dependencies).
Definition: schedule.h:702
IterVar thread_axis(Range dom, std::string tag)
Create a new IterVar that represents an axis in thread.
IterVar reduce_axis(Range dom, std::string name="rv")
Create a new IterVar for reduction operations.
Tensor compute(Array< PrimExpr > shape, FCompute fcompute, std::string name="tensor", std::string tag="", Map< String, ObjectRef > attrs={})
Construct a new tensor by computing over shape, using the computation rule: result_tensor[axis] = fco...
Tensor cublas_matmul(const Tensor &lhs, const Tensor &rhs, bool transa, bool transb)
Create an op that multiplies lhs and rhs with cuBLAS.
Definition: cublas.h:46
tvm::te::Tensor dense_cuda(const Target &target, const tvm::te::Tensor &data, const tvm::te::Tensor &weight, const tvm::te::Tensor &bias, const DataType &out_dtype)
Implementation of dense for CUDA backend.
Definition: dense.h:53
Schedule schedule_extern(const Target &target, const Array< Tensor > &outs)
Schedule an extern op followed by injective operations.
Definition: extern.h:48
tvm::te::Tensor dense(const tvm::te::Tensor &data, const tvm::te::Tensor &weight, const tvm::te::Tensor &bias, const DataType &out_dtype)
Creates an operation that calculates data * weight^T + bias.
Definition: dense.h:48
Schedule schedule_dense(const Target &target, const Array< Tensor > &outs)
Create a rocm schedule for dense.
Definition: dense.h:88
constexpr auto kBroadcast
Definition: tags.h:36
bool is_broadcast(std::string tag)
Definition: tags.h:47
runtime implementation for LibTorch/TorchScript.
Definition: analyzer.h:36
Operation node can generate one or multiple Tensors.
Collection of Schedule pass functions.