api/doxygen/pooling_8h_source.html

 /*

  * Licensed to the Apache Software Foundation (ASF) under one

  * or more contributor license agreements.  See the NOTICE file

  * distributed with this work for additional information

  * regarding copyright ownership.  The ASF licenses this file

  * to you under the Apache License, Version 2.0 (the

  * "License"); you may not use this file except in compliance

  * with the License.  You may obtain a copy of the License at

  *

  *   http://www.apache.org/licenses/LICENSE-2.0

  *

  * Unless required by applicable law or agreed to in writing,

  * software distributed under the License is distributed on an

  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY

  * KIND, either express or implied.  See the License for the

  * specific language governing permissions and limitations

  * under the License.

  */


 #ifndef TVM_TOPI_NN_POOLING_H_

 #define TVM_TOPI_NN_POOLING_H_


 #include <tvm/arith/analyzer.h>

 #include <tvm/topi/detail/pad_utils.h>

 #include <tvm/topi/nn.h>

 #include <tvm/topi/reduction.h>

 #include <tvm/topi/tags.h>


 #include <algorithm>

 #include <string>

 #include <vector>


 namespace tvm {

 namespace topi {

 namespace nn {


 using namespace tvm::te;


 enum PoolType : int {

   kAvgPool,

   kMaxPool,

 };


 inline Tensor pool_grad_impl(const Tensor& out_grad, const Tensor& x,

                              const Array<PrimExpr>& kernel_size, const Array<PrimExpr>& stride_size,

                              const Array<PrimExpr>& padding_size, PoolType pool_type,

                              bool ceil_mode, const size_t height_axis, const size_t width_axis,

                              bool count_include_pad) {

   ICHECK(out_grad->shape.size() >= 2) << "Pooling grad output must >= 2-D (H, W)";

   ICHECK(x->shape.size() >= 2) << "Pooling input must >= 2-D (H, W)";

   ICHECK_EQ(kernel_size.size(), 2) << "Pooling kernel_size must have 2 elements";

   ICHECK_EQ(stride_size.size(), 2) << "Pooling stride_size must have 2 elements";

   ICHECK_EQ(padding_size.size(), 4) << "Pooling padding_size must have 4 elements";


   auto kernel_height = kernel_size[0];

   auto kernel_width = kernel_size[1];

   auto stride_height = stride_size[0];

   auto stride_width = stride_size[1];


   auto height = x->shape[height_axis];

   auto width = x->shape[width_axis];


   auto pad_top = padding_size[0];

   auto pad_left = padding_size[1];

   auto pad_bottom = padding_size[2];

   auto pad_right = padding_size[3];


   if (ceil_mode) {

     // Additional padding to ensure we do ceil instead of floor when

     // dividing by stride.

     pad_bottom += stride_height - 1;

     pad_right += stride_width - 1;

   }


   Array<PrimExpr> pad_before(std::vector<PrimExpr>(x->shape.size(), 0));

   pad_before.Set(height_axis, pad_top);

   pad_before.Set(width_axis, pad_left);


   Array<PrimExpr> pad_after(std::vector<PrimExpr>(x->shape.size(), 0));

   pad_after.Set(height_axis, pad_bottom);

   pad_after.Set(width_axis, pad_right);

   arith::Analyzer analyzer;

   auto out_height =

       analyzer.Simplify((height - kernel_height + pad_top + pad_bottom) / stride_height + 1);

   auto out_width =

       analyzer.Simplify((width - kernel_width + pad_left + pad_right) / stride_width + 1);


   auto dheight = tvm::te::reduce_axis(Range(0, kernel_height), "dh");

   auto dwidth = tvm::te::reduce_axis(Range(0, kernel_width), "dw");


   Array<PrimExpr> data_shape = x->shape;

   Array<PrimExpr> out_shape = data_shape;

   out_shape.Set(height_axis, out_height);

   out_shape.Set(width_axis, out_width);


   const int64_t* padding_h0 = as_const_int(pad_top);

   const int64_t* padding_w0 = as_const_int(pad_left);

   const int64_t* padding_h1 = as_const_int(pad_bottom);

   const int64_t* padding_w1 = as_const_int(pad_right);

   const bool do_pad = ((padding_h0 && *padding_h0) || (padding_w0 && *padding_w0)) ||

                       ((padding_h1 && *padding_h1) || (padding_w1 && *padding_w1));


   if (pool_type == kMaxPool) {

     Array<PrimExpr> ravel_shape{data_shape.begin(), data_shape.end()};

     ravel_shape.Set(height_axis, ravel_shape[height_axis] + pad_top + pad_bottom);

     ravel_shape.Set(width_axis, ravel_shape[width_axis] + pad_left + pad_right);


     auto windowh =

         tvm::te::reduce_axis(Range(0, (kernel_height + stride_height - 1) / stride_height), "wh");

     auto windoww =

         tvm::te::reduce_axis(Range(0, (kernel_width + stride_width - 1) / stride_width), "ww");


     auto argmax = MakeArgmaxReducer();

     auto pad_x = do_pad ? pad(x, pad_before, pad_after, tvm::min_value(x->dtype), "pad_temp") : x;


     auto mp_argmax = tvm::te::compute(

         out_shape,

         [&](const Array<Var>& inds) {

           Array<PrimExpr> window_inds{inds.begin(), inds.end()};

           window_inds.Set(height_axis, inds[height_axis] * stride_height + dheight);

           window_inds.Set(width_axis, inds[width_axis] * stride_width + dwidth);

           auto idx = detail::RavelIndex(window_inds, ravel_shape);

           return argmax({idx, pad_x(window_inds)}, {dheight, dwidth}, nullptr);

         },

         "maxpool_grad_argmax", kCommReduceIdx);


     auto mp_inds = mp_argmax[0];


     return tvm::te::compute(

         data_shape,

         [&](const Array<Var>& inds) {

           Array<PrimExpr> pad_inds{inds.begin(), inds.end()};

           pad_inds.Set(height_axis, pad_inds[height_axis] + pad_top);

           pad_inds.Set(width_axis, pad_inds[width_axis] + pad_left);

           auto idx = detail::RavelIndex(pad_inds, ravel_shape);


           Array<PrimExpr> out_idx{inds.begin(), inds.end()};

           out_idx.Set(height_axis, (inds[height_axis] + pad_top) / stride_height - windowh);

           out_idx.Set(width_axis, (inds[width_axis] + pad_left) / stride_width - windoww);


           PrimExpr out_idx_lower_h = tir::Select(

               pad_inds[height_axis] < kernel_height, make_const(pad_inds[height_axis].dtype(), 0),

               (pad_inds[height_axis] - kernel_height) / stride_height + 1);

           PrimExpr out_idx_lower_w = tir::Select(

               pad_inds[width_axis] < kernel_width, make_const(pad_inds[width_axis].dtype(), 0),

               (pad_inds[width_axis] - kernel_width) / stride_width + 1);


           return tvm::sum(

               tvm::if_then_else(tir::And(tir::And(out_idx[height_axis] >= out_idx_lower_h,

                                                   out_idx[width_axis] >= out_idx_lower_w),

                                          mp_inds(out_idx) == idx),

                                 out_grad(out_idx), make_const(x->dtype, 0)),

               {windowh, windoww});

         },

         "T_pool_grad", "pool_grad_max");

   } else if (pool_type == kAvgPool) {

     auto windowh =

         tvm::te::reduce_axis(Range(0, (kernel_height + stride_height - 1) / stride_height), "wh");

     auto windoww =

         tvm::te::reduce_axis(Range(0, (kernel_width + stride_width - 1) / stride_width), "ww");

     return tvm::te::compute(

         data_shape,

         [&](const Array<Var>& inds) {

           PrimExpr pad_h_idx = inds[height_axis] + pad_top;

           PrimExpr pad_w_idx = inds[width_axis] + pad_left;


           // output indices whose pooling windows cover current input element (can be out-of-bound)

           Array<PrimExpr> out_idx{inds.begin(), inds.end()};

           out_idx.Set(height_axis, (pad_h_idx / stride_height - windowh));

           out_idx.Set(width_axis, (pad_w_idx / stride_width - windoww));


           PrimExpr out_idx_lower_h =

               tir::Select(pad_h_idx < kernel_height, make_const(pad_h_idx.dtype(), 0),

                           (pad_h_idx - kernel_height) / stride_height + 1);

           PrimExpr out_idx_lower_w =

               tir::Select(pad_w_idx < kernel_width, make_const(pad_w_idx.dtype(), 0),

                           (pad_w_idx - kernel_width) / stride_width + 1);


           PrimExpr divide_factor;  // number of pooled elements

           if (count_include_pad) {

             divide_factor = kernel_height * kernel_width;

           } else {

             PrimExpr h_start = out_idx[height_axis] * stride_height - pad_top;

             PrimExpr w_start = out_idx[width_axis] * stride_width - pad_left;


             PrimExpr h_end = min(h_start + kernel_height, height);

             PrimExpr w_end = min(w_start + kernel_width, width);

             h_start = max(h_start, make_const(h_start.dtype(), 0));

             w_start = max(w_start, make_const(w_start.dtype(), 0));

             divide_factor =

                 max((h_end - h_start) * (w_end - w_start), make_const(h_end.dtype(), 1));

           }

           return tvm::sum(

               tvm::if_then_else(tir::And(tir::And(out_idx[height_axis] >= out_idx_lower_h,

                                                   out_idx[height_axis] < out_height),

                                          tir::And(out_idx[width_axis] >= out_idx_lower_w,

                                                   out_idx[width_axis] < out_width)),

                                 out_grad(out_idx) / divide_factor, make_const(out_grad->dtype, 0)),

               {windowh, windoww});

         },

         "T_pool_grad", "pool_grad_avg");

   } else {

     LOG(ERROR) << "Unrecognized pool_type: " << pool_type;

     return Tensor();

   }

 }


 inline bool find_depth_height_width(const std::string& layout, int* depth_axis, int* height_axis,

                                     int* width_axis) {

   if (depth_axis) *depth_axis = -1;

   if (height_axis) *height_axis = -1;

   if (width_axis) *width_axis = -1;

   int curr_idx = 0;

   for (size_t i = 0; i < layout.size(); ++i) {

     if ((layout[i] >= 'A' && layout[i] <= 'Z') || (layout[i] >= 'a' && layout[i] <= 'z')) {

       if (layout[i] == 'D' && depth_axis) {

         if (*depth_axis != -1) return false;

         *depth_axis = curr_idx;

       } else if (layout[i] == 'H' && height_axis) {

         if (*height_axis != -1) return false;

         *height_axis = curr_idx;

       } else if (layout[i] == 'W' && width_axis) {

         if (*width_axis != -1) return false;

         *width_axis = curr_idx;

       } else if (layout[i] == 'd' || layout[i] == 'h' || layout[i] == 'w') {

         // do not support split on height, width or depth, e.g., NCHW16w

         return false;

       }

       ++curr_idx;

     }

   }

   if ((depth_axis && *depth_axis == -1) || (height_axis && *height_axis == -1) ||

       (width_axis && *width_axis == -1))

     return false;

   return true;

 }


 inline bool find_height_width(const std::string& layout, int* height_axis, int* width_axis) {

   return find_depth_height_width(layout, /*depth_axis=*/nullptr, height_axis, width_axis);

 }


 inline bool find_width(const std::string& layout, int* width_axis) {

   return find_depth_height_width(layout, /*depth_axis=*/nullptr, /*height_axis=*/nullptr,

                                  width_axis);

 }


 inline Tensor pool_grad(const Tensor& out_grad, const Tensor& x, const Array<PrimExpr>& kernel_size,

                         const Array<PrimExpr>& stride_size, const Array<PrimExpr>& padding_size,

                         PoolType pool_type, bool ceil_mode, const std::string& layout = "NCHW",

                         bool count_include_pad = true) {

   int height_axis = -1, width_axis = -1;

   ICHECK(find_height_width(layout, &height_axis, &width_axis)) << "Unsupported layout " << layout;

   return pool_grad_impl(out_grad, x, kernel_size, stride_size, padding_size, pool_type, ceil_mode,

                         height_axis, width_axis, count_include_pad);

 }


 inline PrimExpr start_index(const Var& out_index, const PrimExpr& odim, const PrimExpr& idim) {

   return indexdiv(out_index * idim, odim);

 }


 inline PrimExpr end_index(const Var& out_index, const PrimExpr& odim, const PrimExpr& idim) {

   PrimExpr tmp = indexdiv((out_index + 1) * idim, odim);

   return tvm::tir::Select(indexmod((out_index + 1) * idim, odim) == 0, tmp, tmp + 1);

 }


 inline Tensor adaptive_pool_impl(const Tensor& x, const Array<PrimExpr>& output_size,

                                  PoolType pool_type, const std::vector<int>& axes) {

   const auto n_dim = output_size.size();

   ICHECK_EQ(axes.size(), n_dim) << "The number of axes not equal to the in/out dimension";


   Array<PrimExpr> data_shape = x->shape;

   Array<PrimExpr> out_shape = data_shape;

   Array<PrimExpr> in_size, out_size;

   for (size_t i = 0; i < n_dim; ++i) {

     in_size.push_back(data_shape[axes[i]]);

     out_size.push_back(output_size[i]);

     out_shape.Set(axes[i], out_size[i]);

   }


   auto get_iter_vars = [=](const Array<Var>& output, bool reduce_indices) {

     Array<PrimExpr> indices;

     for (size_t i = 0; i < output.size(); ++i) indices.push_back(output[i]);

     Array<tir::IterVar> reduce_axes;

     for (size_t i = 0; i < n_dim; ++i) {

       auto i_start = start_index(output[axes[i]], out_size[i], in_size[i]);

       auto i_end = end_index(output[axes[i]], out_size[i], in_size[i]);

       auto rv_name = "rv" + std::to_string(i);

       auto rv_axis = tvm::te::reduce_axis(Range(0, i_end - i_start), rv_name);

       reduce_axes.push_back(rv_axis);

       if (reduce_indices) {

         indices.Set(axes[i], i_start + rv_axis);

       }

     }

     return std::make_tuple(indices, reduce_axes);

   };


   Map<String, ObjectRef> attrs;

   if (pool_type == kMaxPool) {

     attrs.Set("schedule_rule", tvm::runtime::String("meta_schedule.adaptive_pool_max"));

     return tvm::te::compute(

         out_shape,

         [&](const Array<Var>& output) {

           Array<PrimExpr> indices;

           Array<tir::IterVar> reduce_axes;

           std::tie(indices, reduce_axes) = get_iter_vars(output, true);

           return tvm::max(x(indices), reduce_axes);  // NOLINT(*)

         },

         "adaptive_pool_max", "adaptive_pool_max", attrs);

   } else if (pool_type == kAvgPool) {

     attrs.Set("schedule_rule", tvm::runtime::String("meta_schedule.adaptive_pool_avg"));

     auto pool_sum = tvm::te::compute(

         out_shape,

         [&](const Array<Var>& output) {

           Array<PrimExpr> indices;

           Array<tir::IterVar> reduce_axes;

           std::tie(indices, reduce_axes) = get_iter_vars(output, true);

           return tvm::sum(x(indices), reduce_axes);

         },

         "adaptive_pool_sum", "adaptive_pool_sum");


     return tvm::te::compute(

         out_shape,

         [&](const Array<Var>& output) {

           Array<PrimExpr> indices;

           Array<tir::IterVar> reduce_axes;

           std::tie(indices, reduce_axes) = get_iter_vars(output, false);


           PrimExpr divide_factor = tvm::cast(x->dtype, 1);

           for (size_t i = 0; i < n_dim; ++i) {

             divide_factor *= tvm::cast(x->dtype, reduce_axes[i]->dom->extent);

           }


           return div(pool_sum(indices), divide_factor);

         },

         "adaptive_pool_avg", kElementWise, attrs);

   } else {

     LOG(ERROR) << "Unrecognized pool_type: " << pool_type;

     return x;

   }

 }


 inline Tensor adaptive_pool(const Tensor& x, const Array<PrimExpr>& output_size, PoolType pool_type,

                             const std::string& layout = "NCHW") {

   int height_axis = -1, width_axis = -1;

   ICHECK(find_height_width(layout, &height_axis, &width_axis)) << "Unsupported layout " << layout;

   return adaptive_pool_impl(x, output_size, pool_type, {height_axis, width_axis});

 }


 inline Tensor adaptive_pool3d(const Tensor& x, const Array<PrimExpr>& output_size,

                               PoolType pool_type, const std::string& layout = "NCDHW") {

   int depth_axis = -1, height_axis = -1, width_axis = -1;

   ICHECK(find_depth_height_width(layout, &depth_axis, &height_axis, &width_axis))

       << "Unsupported layout " << layout;

   return adaptive_pool_impl(x, output_size, pool_type, {depth_axis, height_axis, width_axis});

 }


 inline Tensor adaptive_pool1d(const Tensor& x, const Array<PrimExpr>& output_size,

                               PoolType pool_type, const std::string& layout = "NCW") {

   int width_axis = -1;

   ICHECK(find_width(layout, &width_axis)) << "Unsupported layout " << layout;

   return adaptive_pool_impl(x, output_size, pool_type, {width_axis});

 }


 inline Tensor global_pool(const Tensor& x, PoolType pool_type, const std::string& layout = "NCHW") {

   return adaptive_pool(x, Array<PrimExpr>{1, 1}, pool_type, layout);

 }


 inline Tensor pool_impl_nd(const Tensor& x, const Array<PrimExpr>& kernel_size,

                            const Array<PrimExpr>& stride_size, const Array<PrimExpr>& dilation_size,

                            const Array<PrimExpr>& padding_size, PoolType pool_type, bool ceil_mode,

                            const std::vector<int>& axis, bool count_include_pad) {

   int k_size = kernel_size.size();

   int x_size = x->shape.size();

   ICHECK_EQ(stride_size.size(), k_size) << "Pooling stride_size must have same elements as kernel";

   ICHECK_EQ(padding_size.size(), k_size * 2) << "Pooling padding_size must has double elements of"

                                                 " kernel";

   ICHECK_EQ(axis.size(), k_size) << "axis must have same elements as kernel";


   Array<IterVar> daxis;

   std::vector<PrimExpr> kernel(k_size);

   std::vector<PrimExpr> stride(k_size);

   std::vector<PrimExpr> dilation(k_size);

   std::vector<PrimExpr> pad_head(k_size);

   std::vector<PrimExpr> pad_tail(k_size);

   std::vector<PrimExpr> offset(k_size, 0);

   Array<PrimExpr> pad_before(std::vector<PrimExpr>(x_size, 0));

   Array<PrimExpr> pad_after(std::vector<PrimExpr>(x_size, 0));

   Array<PrimExpr> data_shape = x->shape;

   Array<PrimExpr> out_shape = data_shape;


   bool do_pad = false;

   for (int i = 0; i < k_size; i++) {

     int ii = axis[i];

     kernel[i] = kernel_size[i];

     stride[i] = stride_size[i];

     dilation[i] = dilation_size[i];

     pad_head[i] = padding_size[i];

     pad_tail[i] = padding_size[i + k_size];


     if (ceil_mode) {

       // The offset[i] is an additional padding to ensure we do ceil instead of floor when

       // dividing by stride.

       // In the case of ceil_mode=True and count_include_pad=True,

       // in order to obtain the correct boundary,

       // we also need to use the offset[i] to eliminate this extra padding.

       offset[i] = stride[i] - 1;

       pad_tail[i] += offset[i];

     }


     const int64_t* padding0 = as_const_int(pad_head[i]);

     const int64_t* padding1 = as_const_int(pad_tail[i]);

     do_pad = do_pad || (padding0 && *padding0) || (padding1 && *padding1);


     daxis.push_back(tvm::te::reduce_axis(Range(0, kernel[i]), "rv" + std::to_string(i)));


     pad_before.Set(ii, pad_head[i]);

     pad_after.Set(ii, pad_tail[i]);


     arith::Analyzer analyzer;


     PrimExpr numerator =

         data_shape[ii] - (kernel[i] - 1) * dilation[i] - 1 + pad_head[i] + pad_tail[i];

     auto out_dim = analyzer.Simplify(indexdiv(numerator, stride[i]) + 1);

     out_shape.Set(ii, out_dim);

   }


   Map<String, ObjectRef> attrs;

   if (pool_type == kMaxPool) {

     auto temp = do_pad ? pad(x, pad_before, pad_after, tvm::min_value(x->dtype), "pad_temp") : x;

     attrs.Set("schedule_rule", tvm::runtime::String("meta_schedule.pool_max"));

     return tvm::te::compute(

         out_shape,

         [&](const Array<Var>& output) {

           Array<PrimExpr> indices;

           for (const Var& var : output) indices.push_back(var);


           for (int i = 0; i < k_size; i++) {

             int ii = axis[i];

             indices.Set(ii, output[ii] * stride[i] + daxis[i] * dilation[i]);

           }

           return tvm::max(temp(indices), daxis);

         },

         "pool_max", "pool_max", attrs);

   } else if (pool_type == kAvgPool) {

     attrs.Set("schedule_rule", tvm::runtime::String("meta_schedule.pool_avg"));

     // Pad the inputs

     auto temp = do_pad ? pad(x, pad_before, pad_after, 0, "pad_temp") : x;


     // TVM compute for summing the pooling window.

     auto pool_sum = tvm::te::compute(

         out_shape,

         [&](const Array<Var>& output) {

           Array<PrimExpr> indices;

           for (const Var& var : output) indices.push_back(var);


           for (int i = 0; i < k_size; i++) {

             int ii = axis[i];

             indices.Set(ii, output[ii] * stride[i] + daxis[i] * dilation[i]);

           }

           return tvm::sum(temp(indices), daxis);

         },

         "pool_sum", "pool_sum");


     // TVM compute for dividing the reduced window sum by kernel size.

     return tvm::te::compute(

         out_shape,

         [&](const Array<Var>& output) {

           Array<PrimExpr> indices;

           for (const Var& var : output) indices.push_back(var);

           if (count_include_pad) {

             std::vector<PrimExpr> start(k_size);

             std::vector<PrimExpr> end(k_size);

             auto num_el = make_const(DataType::Int(32), 1);

             for (int i = 0; i < k_size; i++) {

               int ii = axis[i];

               start[i] = output[ii] * stride[i] - pad_head[i];

               // When computing the output shape in ceil_mode,

               // we have added the extra padding of offset[i],

               // so now in order to calculate the correct boundary ,

               // we need to substract the offset[i].

               end[i] = start[i] + (kernel[i] - 1) * dilation[i];

               end[i] = min(end[i], data_shape[ii] + pad_tail[i] - 1 - offset[i]);

               num_el *= (end[i] - start[i]) / dilation[i] + 1;

             }

             return div(pool_sum(indices), num_el);

           } else {

             std::vector<PrimExpr> start(k_size);

             std::vector<PrimExpr> end(k_size);

             auto num_el = make_const(DataType::Int(32), 1);

             for (int i = 0; i < k_size; i++) {

               int ii = axis[i];


               // Let start and end contain the first and last index of our Tensor

               // along the relevant dimension we use in our calculation.

               // Assume indices -1, -2 represent the padding before (tail) and

               // len(arr), len(arr) + 1 represent the padding after (head).

               start[i] = output[ii] * stride[i] - pad_head[i];

               end[i] = start[i] + (kernel[i] - 1) * dilation[i];


               // if start[i] < 0, e.g. we start on a tail padded number this will be a positive

               // number that represents the number of steps along the dilated kernel to reach a

               // non-padded value. Otherwise this should be 0.

               PrimExpr jumps_to_non_pad = (dilation[i] - 1 - start[i]) / dilation[i];

               jumps_to_non_pad = max(jumps_to_non_pad, make_const(jumps_to_non_pad.dtype(), 0));


               end[i] = min(end[i], data_shape[ii] - 1);

               num_el *= (end[i] - (start[i] + dilation[i] * jumps_to_non_pad)) / dilation[i] + 1;

             }


             PrimExpr divide_factor = max(num_el, make_const(DataType::Int(32), 1));

             return div(pool_sum(indices), divide_factor);

           }

         },

         "pool_avg", kElementWise, attrs);

   } else {

     LOG(ERROR) << "Unrecognized pool_type: " << pool_type;

     return x;

   }

 }


 inline Tensor pool1d(const Tensor& x, const Array<PrimExpr>& kernel_size,

                      const Array<PrimExpr>& stride_size, const Array<PrimExpr>& dilation_size,

                      const Array<PrimExpr>& padding_size, PoolType pool_type, bool ceil_mode,

                      const std::string& layout = "NCW", bool count_include_pad = true) {

   int width_axis = -1;

   ICHECK(find_width(layout, &width_axis)) << "Unsupported layout " << layout;

   std::vector<int> axis = {width_axis};

   return pool_impl_nd(x, kernel_size, stride_size, dilation_size, padding_size, pool_type,

                       ceil_mode, axis, count_include_pad);

 }


 inline Tensor pool2d(const Tensor& x, const Array<PrimExpr>& kernel_size,

                      const Array<PrimExpr>& stride_size, const Array<PrimExpr>& dilation_size,

                      const Array<PrimExpr>& padding_size, PoolType pool_type, bool ceil_mode,

                      const std::string& layout = "NCHW", bool count_include_pad = true) {

   int height_axis = -1, width_axis = -1;

   ICHECK(find_height_width(layout, &height_axis, &width_axis)) << "Unsupported layout " << layout;

   std::vector<int> axis = {height_axis, width_axis};

   return pool_impl_nd(x, kernel_size, stride_size, dilation_size, padding_size, pool_type,

                       ceil_mode, axis, count_include_pad);

 }


 inline Tensor pool3d(const Tensor& x, const Array<PrimExpr>& kernel_size,

                      const Array<PrimExpr>& stride_size, const Array<PrimExpr>& dilation_size,

                      const Array<PrimExpr>& padding_size, PoolType pool_type, bool ceil_mode,

                      const std::string& layout = "NCDHW", bool count_include_pad = true) {

   int depth_axis = -1, height_axis = -1, width_axis = -1;

   ICHECK(find_depth_height_width(layout, &depth_axis, &height_axis, &width_axis))

       << "Unsupported layout " << layout;

   std::vector<int> axis = {depth_axis, height_axis, width_axis};

   return pool_impl_nd(x, kernel_size, stride_size, dilation_size, padding_size, pool_type,

                       ceil_mode, axis, count_include_pad);

 }


 }  // namespace nn

 }  // namespace topi

 }  // namespace tvm

 #endif  // TVM_TOPI_NN_POOLING_H_

analyzer.h
Algebra expression simplifications.

tvm::PrimExpr
Reference to PrimExprNode.
Definition: expr.h:115

tvm::PrimExpr::dtype
DataType dtype() const
Definition: expr.h:129

tvm::Range
Range container
Definition: expr.h:687

tvm::arith::Analyzer
Analyzer that contains bunch of sub-analyzers.
Definition: analyzer.h:629

tvm::arith::Analyzer::Simplify
PrimExpr Simplify(const PrimExpr &expr, int steps=2)
Simplify expr.

tvm::runtime::Array
Array, container representing a contiguous sequence of ObjectRefs.
Definition: array.h:289

tvm::runtime::Array::end
iterator end() const
Definition: array.h:390

tvm::runtime::Array::push_back
void push_back(const T &item)
push a new item to the back of the list
Definition: array.h:457

tvm::runtime::Array::Set
void Set(int64_t i, T value)
set i-th element of the array.
Definition: array.h:621

tvm::runtime::Array::begin
iterator begin() const
Definition: array.h:387

tvm::runtime::Array::size
size_t size() const
Definition: array.h:420

tvm::runtime::DataType::Int
static DataType Int(int bits, int lanes=1)
Construct an int type.
Definition: data_type.h:227

tvm::runtime::Map
Map container of NodeRef->NodeRef in DSL graph. Map implements copy on write semantics,...
Definition: map.h:1271

tvm::runtime::Map::Set
void Set(const K &key, const V &value)
set the Map.
Definition: map.h:1374

tvm::runtime::String
Reference to string objects.
Definition: string.h:97

tvm::te::Tensor
Tensor structure representing a possible input, or intermediate computation result.
Definition: tensor.h:102

tvm::tir::And
Managed reference to AndNode.
Definition: expr.h:482

tvm::tir::Select
Managed reference to SelectNode.
Definition: expr.h:609

tvm::tir::Var
a named variable in TIR
Definition: var.h:89

tvm::te
Tensor expression language DSL.
Definition: extracted_task.h:33

tvm::te::reduce_axis
IterVar reduce_axis(Range dom, std::string name="rv")
Create a new IterVar for reduction operations.

tvm::te::var
Var var(std::string name_hint, DataType t=DataType::Int(32))
Construct a new Var expression.

tvm::te::compute
Tensor compute(Array< PrimExpr > shape, FCompute fcompute, std::string name="tensor", std::string tag="", Map< String, ObjectRef > attrs={})
Construct a new tensor by computing over shape, using the computation rule: result_tensor[axis] = fco...

tvm::tir::make_const
PrimExpr make_const(DataType t, ValueType value, Span span=Span())
Make a const value with certain data type.
Definition: op.h:962

tvm::tir::as_const_int
const int64_t * as_const_int(const PrimExpr &x)
Get x as constant int expression.
Definition: op.h:804

tvm::topi::nn::adaptive_pool
Tensor adaptive_pool(const Tensor &x, const Array< PrimExpr > &output_size, PoolType pool_type, const std::string &layout="NCHW")
Adaptively perform pooling on height and width dimension of data. The pooling kernel and stride sizes...
Definition: pooling.h:424

tvm::topi::nn::pool_impl_nd
Tensor pool_impl_nd(const Tensor &x, const Array< PrimExpr > &kernel_size, const Array< PrimExpr > &stride_size, const Array< PrimExpr > &dilation_size, const Array< PrimExpr > &padding_size, PoolType pool_type, bool ceil_mode, const std::vector< int > &axis, bool count_include_pad)
Perform pooling on N-dimension of data.
Definition: pooling.h:507

tvm::topi::nn::pool_grad_impl
Tensor pool_grad_impl(const Tensor &out_grad, const Tensor &x, const Array< PrimExpr > &kernel_size, const Array< PrimExpr > &stride_size, const Array< PrimExpr > &padding_size, PoolType pool_type, bool ceil_mode, const size_t height_axis, const size_t width_axis, bool count_include_pad)
Definition: pooling.h:49

tvm::topi::nn::adaptive_pool_impl
Tensor adaptive_pool_impl(const Tensor &x, const Array< PrimExpr > &output_size, PoolType pool_type, const std::vector< int > &axes)
Perform adaptive pooling on N dimensional data.
Definition: pooling.h:322

tvm::topi::nn::PoolType
PoolType
Pooling type.
Definition: pooling.h:44

tvm::topi::nn::kAvgPool
@ kAvgPool
Definition: pooling.h:45

tvm::topi::nn::kMaxPool
@ kMaxPool
Definition: pooling.h:46

tvm::topi::nn::adaptive_pool3d
Tensor adaptive_pool3d(const Tensor &x, const Array< PrimExpr > &output_size, PoolType pool_type, const std::string &layout="NCDHW")
Adaptively perform pooling on three dimensional data. See the two dimensional version above for detai...
Definition: pooling.h:439

tvm::topi::nn::start_index
PrimExpr start_index(const Var &out_index, const PrimExpr &odim, const PrimExpr &idim)
Definition: pooling.h:303

tvm::topi::nn::pool_grad
Tensor pool_grad(const Tensor &out_grad, const Tensor &x, const Array< PrimExpr > &kernel_size, const Array< PrimExpr > &stride_size, const Array< PrimExpr > &padding_size, PoolType pool_type, bool ceil_mode, const std::string &layout="NCHW", bool count_include_pad=true)
Calculate gradient of pooling on height and width dimension of data. It decides the height and width ...
Definition: pooling.h:293

tvm::topi::nn::end_index
PrimExpr end_index(const Var &out_index, const PrimExpr &odim, const PrimExpr &idim)
Definition: pooling.h:307

tvm::topi::nn::pool2d
Tensor pool2d(const Tensor &x, const Array< PrimExpr > &kernel_size, const Array< PrimExpr > &stride_size, const Array< PrimExpr > &dilation_size, const Array< PrimExpr > &padding_size, PoolType pool_type, bool ceil_mode, const std::string &layout="NCHW", bool count_include_pad=true)
Perform pooling on height and width dimension of data. It decides the height and width dimension acco...
Definition: pooling.h:731

tvm::topi::nn::find_depth_height_width
bool find_depth_height_width(const std::string &layout, int *depth_axis, int *height_axis, int *width_axis)
Find index of Depth, Height or Width dimension in a layout string.
Definition: pooling.h:224

tvm::topi::nn::find_width
bool find_width(const std::string &layout, int *width_axis)
Definition: pooling.h:258

tvm::topi::nn::adaptive_pool1d
Tensor adaptive_pool1d(const Tensor &x, const Array< PrimExpr > &output_size, PoolType pool_type, const std::string &layout="NCW")
Adaptively perform pooling on one dimensional data. See the two dimensional version above for details...
Definition: pooling.h:455

tvm::topi::nn::pool1d
Tensor pool1d(const Tensor &x, const Array< PrimExpr > &kernel_size, const Array< PrimExpr > &stride_size, const Array< PrimExpr > &dilation_size, const Array< PrimExpr > &padding_size, PoolType pool_type, bool ceil_mode, const std::string &layout="NCW", bool count_include_pad=true)
Perform pooling on the width dimension of data. Width axis is determined by the layout string in whic...
Definition: pooling.h:690

tvm::topi::nn::pool3d
Tensor pool3d(const Tensor &x, const Array< PrimExpr > &kernel_size, const Array< PrimExpr > &stride_size, const Array< PrimExpr > &dilation_size, const Array< PrimExpr > &padding_size, PoolType pool_type, bool ceil_mode, const std::string &layout="NCDHW", bool count_include_pad=true)
Perform pooling on depth, height and width dimension of data. It decides the depth,...
Definition: pooling.h:773

tvm::topi::nn::find_height_width
bool find_height_width(const std::string &layout, int *height_axis, int *width_axis)
Definition: pooling.h:254

tvm::topi::nn::global_pool
Tensor global_pool(const Tensor &x, PoolType pool_type, const std::string &layout="NCHW")
Perform global pooling on height and width dimension of data. It decides the height and width dimensi...
Definition: pooling.h:487

tvm::topi::kElementWise
constexpr auto kElementWise
Definition: tags.h:32

tvm::topi::pad
tvm::te::Tensor pad(const tvm::te::Tensor &t, const tvm::Array< tvm::PrimExpr > &pad_before, tvm::Array< tvm::PrimExpr > pad_after=tvm::Array< tvm::PrimExpr >(), PrimExpr pad_value=PrimExpr(), std::string name="T_pad", std::string tag=kElementWise, std::string pad_mode="constant", const Array< PrimExpr > *dyn_output_shape=nullptr)
Creates an operation that performs padding.
Definition: nn.h:155

tvm::topi::MakeArgmaxReducer
FCommReduce MakeArgmaxReducer(bool select_last_index=false)
Definition: reduction.h:506

tvm::topi::argmax
Tensor argmax(const Tensor &data, const Array< Integer > &axis, bool keepdims=false, bool atleast1d=false, bool select_last_index=false)
Creates an operation that finds the indices of the maximum values over a given axis.
Definition: reduction.h:560

tvm::topi::max
Tensor max(const Tensor &data, const Array< Integer > &axis, bool keepdims=false, bool atleast1d=false)
Creates an operation that finds the maximum of elements over a given axis.
Definition: reduction.h:440

tvm::topi::kCommReduceIdx
constexpr auto kCommReduceIdx
Definition: tags.h:35

tvm::topi::min
Tensor min(const Tensor &data, const Array< Integer > &axis, bool keepdims=false, bool atleast1d=false)
Creates an operation that finds the minimum of elements over a given axis.
Definition: reduction.h:421

tvm
Performance counters for profiling via the PAPI library.
Definition: analyzer.h:36

tvm::max
PrimExpr max(PrimExpr a, PrimExpr b, Span span=Span())
take maximum of two values

tvm::div
PrimExpr div(PrimExpr a, PrimExpr b, Span span=Span())
compute division in C semantics.

tvm::if_then_else
PrimExpr if_then_else(PrimExpr cond, PrimExpr true_value, PrimExpr false_value, Span span=Span())
Conditional expression.

tvm::min_value
PrimExpr min_value(const DataType &dtype, Span span=Span())

tvm::cast
PrimExpr cast(const DataType &t, PrimExpr value, Span span=Span())
cast value to type.

tvm::indexdiv
PrimExpr indexdiv(PrimExpr a, PrimExpr b, Span span=Span())
compute floor(a / b) where a and b are non-negative.

tvm::indexmod
PrimExpr indexmod(PrimExpr a, PrimExpr b, Span span=Span())
compute the remainder floor(a / b) where a and b are non-negative.

tvm::sum
PrimExpr sum(PrimExpr source, Array< tir::IterVar > axis, Array< PrimExpr > init={}, Span span=Span())
sum of source expression over axis

pad_utils.h
Padding helpers.

reduction.h
Reduction op constructors.

tags.h
External function interface to rocBLAS libraries.

nn.h
NN op constructions.