api/doxygen/cuda_2softmax_8h_source.html

 /*

  * Licensed to the Apache Software Foundation (ASF) under one

  * or more contributor license agreements.  See the NOTICE file

  * distributed with this work for additional information

  * regarding copyright ownership.  The ASF licenses this file

  * to you under the Apache License, Version 2.0 (the

  * "License"); you may not use this file except in compliance

  * with the License.  You may obtain a copy of the License at

  *

  *   http://www.apache.org/licenses/LICENSE-2.0

  *

  * Unless required by applicable law or agreed to in writing,

  * software distributed under the License is distributed on an

  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY

  * KIND, either express or implied.  See the License for the

  * specific language governing permissions and limitations

  * under the License.

  */


 #ifndef TVM_TOPI_CUDA_SOFTMAX_H_

 #define TVM_TOPI_CUDA_SOFTMAX_H_


 #include <tvm/target/generic_func.h>

 #include <tvm/te/operation.h>

 #include <tvm/te/schedule_pass.h>

 #include <tvm/topi/detail/fuse.h>

 #include <tvm/topi/tags.h>


 namespace tvm {

 namespace topi {


 using namespace tvm::te;


 namespace cuda {


 inline Schedule schedule_softmax(const Target& target, const Array<Tensor>& outs) {

   Array<Operation> out_ops;

   for (auto t : outs) {

     out_ops.push_back(t->op);

   }

   auto s = create_schedule(out_ops);


   auto softmax = outs[0];

   tvm::te::Tensor max_elem;

   tvm::te::Tensor expsum;

   tvm::te::Tensor exp;

   bool has_exp = false;


   auto tag = softmax->op.as<ComputeOpNode>()->tag;

   if (tag == "softmax_output") {

     expsum = softmax->op->InputTensors()[1];

     exp = softmax->op->InputTensors()[0];

     max_elem = s[exp]->op->InputTensors()[1];

     has_exp = true;

   } else if (tag == "log_softmax_output") {

     max_elem = softmax->op->InputTensors()[1];

     expsum = softmax->op->InputTensors()[2];

   } else {

     LOG(ERROR) << "Tag is expected to be softmax_output or log_softmax_output. Got " << tag;

   }


   int num_thread = 64;

   auto block_x = tvm::te::thread_axis(Range(), "blockIdx.x");

   auto thread_x = tvm::te::thread_axis(Range(0, num_thread), "threadIdx.x");


   if (has_exp) {

     s[exp].bind(exp->op.as<ComputeOpNode>()->axis[0], block_x);

   }


   s[max_elem].bind(max_elem->op.as<ComputeOpNode>()->axis[0], block_x);


   auto k = expsum->op.as<ComputeOpNode>()->reduce_axis[0];

   IterVar ko, ki;

   s[expsum].split(k, num_thread, &ko, &ki);

   auto EF = s.rfactor(expsum, ki)[0];

   s[expsum].bind(s[expsum]->op.as<ComputeOpNode>()->axis[0], block_x);

   s[expsum].bind(s[expsum]->op.as<ComputeOpNode>()->reduce_axis[0], thread_x);

   s[EF].compute_at(s[expsum], s[expsum]->op.as<ComputeOpNode>()->reduce_axis[0]);

   s[expsum].set_store_predicate(thread_x->var == 0);


   IterVar tx, xi;

   s[softmax].split_by_nparts(softmax->op.as<ComputeOpNode>()->axis[1], num_thread, &tx, &xi);

   s[softmax].bind(tx, thread_x);


   return s;

 }


 }  // namespace cuda

 }  // namespace topi

 }  // namespace tvm

 #endif  // TVM_TOPI_CUDA_SOFTMAX_H_

tvm::Range
Range container
Definition: expr.h:725

tvm::Target
Managed reference class to TargetNode.
Definition: target.h:200

tvm::runtime::Array
Array, container representing a contiguous sequence of ObjectRefs.
Definition: array.h:289

tvm::runtime::Array::push_back
void push_back(const T &item)
push a new item to the back of the list
Definition: array.h:457

tvm::runtime::ObjectRef::as
const ObjectType * as() const
Try to downcast the internal Object to a raw pointer of a corresponding type.
Definition: object.h:910

tvm::te::BaseComputeOpNode::axis
Array< IterVar > axis
IterVar on each axis.
Definition: operation.h:207

tvm::te::BaseComputeOpNode::reduce_axis
Array< IterVar > reduce_axis
IterVar on each reduction axis, if the body is a Reduce.
Definition: operation.h:209

tvm::te::ComputeOpNode
A Compute op that compute a tensor on certain domain.
Definition: operation.h:226

tvm::te::Schedule
Global schedule container For operations and all the operations they depend on. The schedule per Oper...
Definition: schedule.h:326

tvm::te::Tensor
Tensor structure representing a possible input, or intermediate computation result.
Definition: tensor.h:102

tvm::tir::IterVar
Iteration Variable, represents an iteration over an integer interval.
Definition: var.h:315

fuse.h
Fuse operation.

generic_func.h
Generic function that can be specialzied on a per target basis.

tvm::te
Tensor expression language DSL.
Definition: extracted_task.h:33

tvm::te::create_schedule
Schedule create_schedule(Array< Operation > ops)
Create a schedule for array of ops(and their dependencies).
Definition: schedule.h:702

tvm::te::thread_axis
IterVar thread_axis(Range dom, std::string tag)
Create a new IterVar that represents an axis in thread.

tvm::te::reduce_axis
IterVar reduce_axis(Range dom, std::string name="rv")
Create a new IterVar for reduction operations.

tvm::topi::nn::softmax
Tensor softmax(const Tensor &x, int axis=-1, std::string name="tensor", std::string tag="softmax_output")
Softmax activation.
Definition: softmax.h:50

tvm::topi::rocm::schedule_softmax
Schedule schedule_softmax(const Target &target, const Array< Tensor > &outs)
Create a rocm schedule for the given softmax output tensors.
Definition: softmax.h:48

tvm::topi::exp
Tensor exp(const Tensor &x, std::string name="T_" "exp", std::string tag=kElementWise)
Definition: elemwise.h:50

tvm
runtime implementation for LibTorch/TorchScript.
Definition: analyzer.h:36

operation.h
Operation node can generate one or multiple Tensors.

schedule_pass.h
Collection of Schedule pass functions.

tags.h
External function interface to rocBLAS libraries.