This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding=utf-8 | |
# Copyright (c) 2019-2020 NVIDIA CORPORATION. All rights reserved. | |
# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team. | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
PrimFunc([argsort_gpu.v0, argsort_gpu.v2, argsort_gpu.v3, argsort_gpu.v1, i_0, any_dim]) attrs={"target": vulkan -keys=vulkan,gpu -max_num_threads=256, "tir.noalias": 1, "global_symbol": "fused_argsort_kernel2", "tir.device_thread_axis": [iter_var(threadIdx.x, , threadIdx.x), iter_var(blockIdx.x, , blockIdx.x), iter_var(blockIdx.y, , blockIdx.y), iter_var(blockIdx.z, , blockIdx.z)], "calling_conv": 2} { | |
// attr [iter_var(threadIdx.x, , threadIdx.x)] thread_extent = 256 | |
// attr [first] storage_scope = "local" | |
allocate first[int64 * 1] | |
// attr [last] storage_scope = "local" | |
allocate last[int64 * 1] | |
// attr [first] storage_scope = "local" | |
allocate first[int64 * 1] | |
// attr [last] storage_scope = "local" | |
allocate last[int64 * 1] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
type Option[A] { | |
Some(A), | |
None, | |
} | |
type static_tensor_float32_2_4_t { | |
tensor_nil_float32_2_4, | |
tensor_constructor_float32_2_4(Tensor[(2, 4), float32]), | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
extern "C" __global__ void fused_expand_dims_concatenate_1_kernel0(float* __restrict__ T_concat, float* __restrict__ placeholder, float* __restrict__ placeholder1, int any_dim, int stride, int stride1, int stride2, int stride3, int stride4) { | |
if (((int)blockIdx.x) < (((any_dim * 90000) + 90000) >> 9)) { | |
if ((((((int)blockIdx.x) * 512) + ((int)threadIdx.x)) / 90000) < (any_dim + 1)) { | |
if ((((((int)blockIdx.x) * 512) + ((int)threadIdx.x)) / 300) < ((any_dim * 300) + 300)) { | |
T_concat[(((((int)blockIdx.x) * 512) + ((int)threadIdx.x)))] = ((0 <= ((((((int)blockIdx.x) * 512) + ((int)threadIdx.x)) / 90000) - any_dim)) ? placeholder[(((((((((int)blockIdx.x) * 512) + ((int)threadIdx.x)) % 90000) / 300) * stride) + ((((((int)blockIdx.x) * 512) + ((int)threadIdx.x)) % 300) * stride1)))] : placeholder1[(((((((((int)blockIdx.x) * 512) + ((int)threadIdx.x)) / 90000) * stride2) + (((((((int)blockIdx.x) * 512) + ((int)threadIdx.x)) % 90000) / 300) * stride3)) + ((((((int)blockIdx.x) * 512) + ((int)threadI |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
type Storage { | |
} | |
def @main(%data: Tensor[(1, 3, 224, 224), float32]) -> Tensor[(1, 1000), float32] { | |
let %storage_0: Storage[] = memory.alloc_storage(602112 /* ty=int64 */, 64 /* ty=int64 */, meta[relay.attrs.AllocStorageAttrs][0]) /* ty=Storage[] */; | |
let %tensor_0: Tensor[(1, 224, 224, 3), float32] = memory.alloc_tensor(%storage_0, 0 /* ty=int64 */, meta[relay.Constant][0] /* ty=Tensor[(4), int64] */, meta[relay.attrs.AllocTensorAttrs][0]) /* ty=Tensor[(1, 224, 224, 3), float32] */; | |
%2 = fn (%p0: Tensor[(1, 3, 224, 224), float32], %p1: Tensor[(3, 1, 1), float32], %p2: Tensor[(3, 1, 1), float32], Primitive=1) -> Tensor[(1, 224, 224, 3), float32] { | |
%0 = multiply(%p0, %p1) /* ty=Tensor[(1, 3, 224, 224), float32] */; | |
%1 = add(%0, %p2) /* ty=Tensor[(1, 3, 224, 224), float32] */; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def @main(%data: Tensor[(1, 3, 224, 224), float32]) -> Tensor[(1, 1000), float32] { | |
%2 = fn (%p0: Tensor[(1, 3, 224, 224), float32], %p1: Tensor[(3, 1, 1), float32], %p2: Tensor[(3, 1, 1), float32], Primitive=1) -> Tensor[(1, 224, 224, 3), float32] { | |
%0 = multiply(%p0, %p1) /* ty=Tensor[(1, 3, 224, 224), float32] */; | |
%1 = add(%0, %p2) /* ty=Tensor[(1, 3, 224, 224), float32] */; | |
layout_transform(%1, src_layout="NCHW", dst_layout="NHWC") /* ty=Tensor[(1, 224, 224, 3), float32] */ | |
}; | |
%3 = %2(%data, meta[relay.Constant][0] /* ty=Tensor[(3, 1, 1), float32] */, meta[relay.Constant][1] /* ty=Tensor[(3, 1, 1), float32] */) /* ty=Tensor[(1, 224, 224, 3), float32] */; | |
%6 = fn (%p01: Tensor[(1, 224, 224, 3), float32], %p11: Tensor[(7, 7, 3, 64), float32], %p21: Tensor[(1, 1, 1, 64), float32], Primitive=1) -> Tensor[(1, 112, 112, 64), float32] { | |
%4 = nn.conv2d(%p01, %p11, strides=[2, 2], padding=[3, 3, 3, 3], channels=64, kernel_size=[7, 7], data_layout="NHWC", kernel_layout="HWIO") /* ty=Tensor[( |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
type Storage { | |
} | |
def @main(%data: Tensor[(1, 3, 224, 224), float32], %bn_data_gamma: Tensor[(3), float32], %bn_data_beta: Tensor[(3), float32], %bn_data_moving_mean: Tensor[(3), float32], %bn_data_moving_var: Tensor[(3), float32], %conv0_weight: Tensor[(64, 3, 7, 7), float32], %bn0_gamma: Tensor[(64), float32], %bn0_beta: Tensor[(64), float32], %bn0_moving_mean: Tensor[(64), float32], %bn0_moving_var: Tensor[(64), float32], %stage1_unit1_bn1_gamma: Tensor[(64), float32], %stage1_unit1_bn1_beta: Tensor[(64), float32], %stage1_unit1_bn1_moving_mean: Tensor[(64), float32], %stage1_unit1_bn1_moving_var: Tensor[(64), float32], %stage1_unit1_conv1_weight: Tensor[(64, 64, 1, 1), float32], %stage1_unit1_bn2_gamma: Tensor[(64), float32], %stage1_unit1_bn2_beta: Tensor[(64), float32], %stage1_unit1_bn2_moving_mean: Tensor[(64), float32], %stage1_unit1_bn2_moving_var: Tensor[(64), float32], %stage1_unit1_conv2_weight: Tensor[(64, 64, 3, 3), float32], %stage1_unit1_bn3_gamma: Tensor[(64), float32], %stage1_unit1_bn3_ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
extern "C" __global__ void fused_dyn_full_kernel0(float* __restrict__ T_full, int* __restrict__ placeholder, int any_dim, int any_dim1, int any_dim2, int any_dim3, int stride, int stride1, int stride2, int stride3) { | |
if (((int)blockIdx.x) < ((((any_dim * any_dim1) * any_dim2) * any_dim3) >> 9)) { | |
if (((((any_dim1 >= 0) && ((((((any_dim2 >= 0) && ((((((any_dim3 >= 0) && ((((((int)blockIdx.x) * 512) + ((int)threadIdx.x)) % any_dim3) >= 0)) || ((any_dim3 < 0) && ((((((int)blockIdx.x) * 512) + ((int)threadIdx.x)) % any_dim3) <= 0))) ? (((((int)blockIdx.x) * 512) + ((int)threadIdx.x)) / any_dim3) : ((((((int)blockIdx.x) * 512) + ((int)threadIdx.x)) / any_dim3) - 1)) % any_dim2) >= 0)) || ((any_dim2 < 0) && ((((((any_dim3 >= 0) && ((((((int)blockIdx.x) * 512) + ((int)threadIdx.x)) % any_dim3) >= 0)) || ((any_dim3 < 0) && ((((((int)blockIdx.x) * 512) + ((int)threadIdx.x)) % any_dim3) <= 0))) ? (((((int)blockIdx.x) * 512) + ((int)threadIdx.x)) / any_dim3) : ((((((int)blockIdx.x) * 512) + ((int)threadIdx.x)) / a |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import cv2 | |
import torch | |
import torchvision | |
in_size = 300 | |
input_shape = (1, 3, in_size, in_size) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
; ModuleID = 'TVMMod' | |
source_filename = "TVMMod" | |
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" | |
target triple = "x86_64-pc-linux-gnu" | |
%0 = type { double } | |
%1 = type { i8*, %2, i32, %3, i64*, i64*, i64 } | |
%2 = type { i32, i32 } | |
%3 = type { i8, i8, i16 } |
NewerOlder