api/doxygen/cuda_2pooling_8h_source.html

 /*

  * Licensed to the Apache Software Foundation (ASF) under one

  * or more contributor license agreements.  See the NOTICE file

  * distributed with this work for additional information

  * regarding copyright ownership.  The ASF licenses this file

  * to you under the Apache License, Version 2.0 (the

  * "License"); you may not use this file except in compliance

  * with the License.  You may obtain a copy of the License at

  *

  *   http://www.apache.org/licenses/LICENSE-2.0

  *

  * Unless required by applicable law or agreed to in writing,

  * software distributed under the License is distributed on an

  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY

  * KIND, either express or implied.  See the License for the

  * specific language governing permissions and limitations

  * under the License.

  */


 #ifndef TVM_TOPI_CUDA_POOLING_H_

 #define TVM_TOPI_CUDA_POOLING_H_


 #include <tvm/target/generic_func.h>

 #include <tvm/te/operation.h>

 #include <tvm/te/schedule_pass.h>

 #include <tvm/topi/detail/array_utils.h>

 #include <tvm/topi/detail/fuse.h>

 #include <tvm/topi/tags.h>


 namespace tvm {

 namespace topi {


 using namespace tvm::te;


 namespace cuda {


 inline Schedule schedule_pool(const Target& target, const Array<Tensor>& outs) {

   Array<Operation> out_ops;

   for (auto t : outs) {

     out_ops.push_back(t->op);

   }

   auto s = create_schedule(out_ops);


   auto _schedule = [&](const Tensor& padded_input, const Tensor& pool) {

     if (padded_input->op->IsInstance<ComputeOpNode>()) {

       s[padded_input].compute_inline();

     }

     int num_thread = target->GetAttr<Integer>("max_num_threads").value().IntValue();

     Tensor out;

     Tensor OL;

     if (detail::contains(s->outputs, pool->op)) {

       out = pool;

       OL = s.cache_write(pool, "local");

     } else {

       out = outs[0]->op.output(0);

       s[pool].set_scope("local");

     }

     auto fused = detail::Fuse(s[out], s[out]->op.as<ComputeOpNode>()->axis);

     IterVar bx, tx;

     s[out].split(fused, num_thread, &bx, &tx);

     s[out].bind(bx, tvm::te::thread_axis(Range(), "blockIdx.x"));

     s[out].bind(tx, tvm::te::thread_axis(Range(), "threadIdx.x"));

     if (detail::contains(s->outputs, pool->op)) {

       s[OL].compute_at(s[out], tx);

     } else {

       s[pool].compute_at(s[out], tx);

     }

   };


   std::function<void(Operation)> traverse;

   traverse = [&](const Operation& op) {

     // Inline all one-to-one-mapping operators except the last stage (output)

     if (is_broadcast(op->tag)) {

       if (!detail::contains(s->outputs, op)) {

         s[op].compute_inline();

       }

       for (auto tensor : op->InputTensors()) {

         if (tensor->op->InputTensors().size() > 0) {

           traverse(tensor->op);

         }

       }

     } else if (op->tag.rfind("pool", 0) == 0) {

       // If tag starts with pool

       auto padded_input = op->InputTensors()[0];

       auto pool = op.output(0);

       _schedule(padded_input, pool);

     } else {

       LOG(ERROR) << "Unsupported operator " << op->tag;

     }

   };


   traverse(outs[0]->op);

   return s;

 }


 inline Schedule schedule_global_pool(const Target& target, const Array<Tensor>& outs) {

   Array<Operation> out_ops;

   for (auto t : outs) {

     out_ops.push_back(t->op);

   }

   auto s = create_schedule(out_ops);


   auto _schedule = [&](const Tensor& pool) {

     auto num_thread = 8;

     auto block_x = tvm::te::thread_axis(Range(), "blockIdx.x");

     auto block_y = tvm::te::thread_axis(Range(), "blockIdx.y");

     auto thread_x = tvm::te::thread_axis(Range(0, num_thread), "threadIdx.x");

     auto thread_y = tvm::te::thread_axis(Range(0, num_thread), "threadIdx.y");

     Tensor out;

     Tensor OL;

     if (detail::contains(s->outputs, pool->op)) {

       out = pool;

       OL = s.cache_write(pool, "local");

     } else {

       out = outs[0]->op.output(0);

       s[pool].set_scope("local");

     }


     auto i = s[out]->op.as<ComputeOpNode>()->axis[0];

     auto c = s[out]->op.as<ComputeOpNode>()->axis[1];


     IterVar by, ty;

     s[out].split(i, num_thread, &by, &ty);

     IterVar bx, tx;

     s[out].split(c, num_thread, &bx, &tx);

     s[out].reorder({by, bx, ty, tx});

     s[out].bind(ty, thread_y);

     s[out].bind(tx, thread_x);

     s[out].bind(by, block_y);

     s[out].bind(bx, block_x);


     if (detail::contains(s->outputs, pool->op)) {

       s[OL].compute_at(s[out], tx);

     } else {

       s[pool].compute_at(s[out], tx);

     }

   };


   std::function<void(Operation)> traverse;

   traverse = [&](const Operation& op) {

     // Inline all one-to-one-mapping operators except the last stage (output)

     if (is_broadcast(op->tag)) {

       if (!detail::contains(s->outputs, op)) {

         s[op].compute_inline();

       }

       for (auto tensor : op->InputTensors()) {

         if (tensor->op->InputTensors().size() > 0) {

           traverse(tensor->op);

         }

       }

     } else if (op->tag.rfind("global_pool", 0) == 0) {

       // If tag starts with global_pool

       auto pool = op.output(0);

       _schedule(pool);

     } else {

       LOG(ERROR) << "Unsupported operator " << op->tag;

     }

   };


   traverse(outs[0]->op);

   return s;

 }


 }  // namespace cuda

 }  // namespace topi

 }  // namespace tvm

 #endif  // TVM_TOPI_CUDA_POOLING_H_

array_utils.h
Utility functions for handling arrays.

tvm::Integer
Container of constant int that adds more constructors.
Definition: expr.h:632

tvm::Integer::IntValue
int64_t IntValue() const
convert to int64_t
Definition: expr.h:669

tvm::Range
Range container
Definition: expr.h:725

tvm::Target
Managed reference class to TargetNode.
Definition: target.h:200

tvm::runtime::Array
Array, container representing a contiguous sequence of ObjectRefs.
Definition: array.h:289

tvm::runtime::Array::push_back
void push_back(const T &item)
push a new item to the back of the list
Definition: array.h:457

tvm::runtime::ObjectRef::as
const ObjectType * as() const
Try to downcast the internal Object to a raw pointer of a corresponding type.
Definition: object.h:910

tvm::te::BaseComputeOpNode::axis
Array< IterVar > axis
IterVar on each axis.
Definition: operation.h:207

tvm::te::ComputeOpNode
A Compute op that compute a tensor on certain domain.
Definition: operation.h:226

tvm::te::Fuse
Managed reference to FuseNode.
Definition: schedule.h:826

tvm::te::Operation
Operation that produces tensors.
Definition: tensor.h:47

tvm::te::Schedule
Global schedule container For operations and all the operations they depend on. The schedule per Oper...
Definition: schedule.h:326

tvm::te::Tensor
Tensor structure representing a possible input, or intermediate computation result.
Definition: tensor.h:102

tvm::tir::IterVar
Iteration Variable, represents an iteration over an integer interval.
Definition: var.h:315

fuse.h
Fuse operation.

generic_func.h
Generic function that can be specialzied on a per target basis.

tvm::te
Tensor expression language DSL.
Definition: extracted_task.h:33

tvm::te::create_schedule
Schedule create_schedule(Array< Operation > ops)
Create a schedule for array of ops(and their dependencies).
Definition: schedule.h:702

tvm::te::thread_axis
IterVar thread_axis(Range dom, std::string tag)
Create a new IterVar that represents an axis in thread.

tvm::topi::rocm::schedule_global_pool
Schedule schedule_global_pool(const Target &target, const Array< Tensor > &outs)
Create a rocm schedule for global_pool.
Definition: pooling.h:61

tvm::topi::rocm::schedule_pool
Schedule schedule_pool(const Target &target, const Array< Tensor > &outs)
Create a rocm schedule for pool.
Definition: pooling.h:49

tvm::topi::is_broadcast
bool is_broadcast(std::string tag)
Definition: tags.h:47

tvm
runtime implementation for LibTorch/TorchScript.
Definition: analyzer.h:36

operation.h
Operation node can generate one or multiple Tensors.

schedule_pass.h
Collection of Schedule pass functions.

tags.h
External function interface to rocBLAS libraries.