-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpulp_conv2d_fp32.h
159 lines (143 loc) · 7.46 KB
/
pulp_conv2d_fp32.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
/*
* Copyright (C) 2021-2022 ETH Zurich and University of Bologna
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Authors: Davide Nadalini, Leonardo Ravaglia
*/
/**
* 2D Convolution layer configuration structure
*/
/**
* @brief Structure for 2D Convolution Training in FP32
* @param input input feature maps for the conv2d layer
* @param coeff weight matrix
* @param output output feature maps for the conv2d layer
* @param Lpad left padding
* @param Rpad right padding
* @param Upad upper padding
* @param Dpad lower padding
* @param stride_w stride in input width
* @param stride_h stride in input height
* @param i2c_buffer pointer to the im2col buffer
* @param bt_buffer pointer to the blocktranspose buffer (to compute input gradients)
* @param skip_in_grad skips the computation of the input grad (1st DNN layer)
* @param HWC tells the 2D Convolution if the input/output tensor is in CHW layout (HWC=0) or HWC format (HWC=1)
* @param opt_matmul_type_fw number of the optimizer matmul to be chosen by the mm_manager for the forward primitive (see mm_manager_list.txt)
* @param opt_matmul_type_wg number of the optimizer matmul to be chosen by the mm_manager for the weight gradient primitive (see mm_manager_list.txt)
* @param opt_matmul_type_ig number of the optimizer matmul to be chosen by the mm_manager for the input gradient primitive (see mm_manager_list.txt)
* @param USE_IM2COL if set to 0, the convd kernel calls for the naive implementation, if set to 1 for the im2col+matmul optimized execution
* @param USE_DMA_IM2COL in case the primitive uses IM2COL + MM, select if to perform im2col using DMA-managed transfers from L2 to L1 (input and output gradient tensors need to be stored in L2, im2col_buffer in L1)
*/
struct Conv2D_args {
struct blob * input;
struct blob * coeff;
struct blob * output;
int Lpad;
int Rpad;
int Upad;
int Dpad;
int stride_h;
int stride_w;
float * i2c_buffer;
float * bt_buffer;
int skip_in_grad;
int HWC;
int opt_matmul_type_fw;
int opt_matmul_type_wg;
int opt_matmul_type_ig;
int USE_IM2COL;
int USE_DMA_IM2COL;
};
/**
* Convolutional layer training functions, grouped into FW and BW
*/
// FORWARD FUNCTIONS
/**
* @brief Forward pass function, forked on PULP cluster.
* @param input input feauture maps for the conv2d layer
* @param coeff weight matrix
* @param output output feature maps for the conv2d layer
* @param Lpad left padding
* @param Rpad right padding
* @param Upad upper padding
* @param Dpad lower padding
* @param stride_w stride in input width
* @param stride_h stride in input height
* @param i2c_buffer pointer to the im2col buffer
* @param HWC tells the 2D Convolution if the input tensor is in CHW layout (HWC=0) or HWC format (HWC=1)
* @param opt_matmul_type_fw number of the optimizer matmul to be chosen by the mm_manager (see mm_manager_list.txt)
* @param USE_IM2COL if set to 0, the convd kernel calls for the naive implementation, if set to 1 for the im2col+matmul optimized execution
* @param USE_DMA_IM2COL in case the primitive uses IM2COL + MM, select if to perform im2col using DMA-managed transfers from L2 to L1 (input tensor needs to be stored in L2, im2col_buffer in L1)
*/
void pulp_conv2d_fp32_fw_cl( void * Conv2D_args );
// BACKWARD FUNCTIONS
/**
* @brief Backward pass function, which internally calls both weight gradient and input gradient calculation
* @param input input feauture maps for the conv2d layer
* @param coeff weight matrix
* @param output output feature maps for the conv2d layer
* @param Lpad left padding
* @param Rpad right padding
* @param Upad upper padding
* @param Dpad lower padding
* @param stride_w stride in input width
* @param stride_h stride in input height
* @param i2c_buffer pointer to the im2col buffer
* @param bt_buffer pointer to the blocktranspose buffer (to compute input gradients)
* @param skip_in_grad skips the computation of the input grad (1st DNN layer)
* @param HWC tells the 2D Convolution if the input/output tensor is in CHW layout (HWC=0) or HWC format (HWC=1)
* @param opt_matmul_type_wg number of the optimizer matmul to be chosen by the mm_manager for the weight gradient primitive (see mm_manager_list.txt)
* @param opt_matmul_type_ig number of the optimizer matmul to be chosen by the mm_manager for the input gradient primitive (see mm_manager_list.txt)
* @param USE_IM2COL if set to 0, the convd kernel calls for the naive implementation, if set to 1 for the im2col+matmul optimized execution
* @param USE_DMA_IM2COL in case the primitive uses IM2COL + MM, select if to perform im2col using DMA-managed transfers from L2 to L1 (input and output gradient tensors need to be stored in L2, im2col_buffer in L1)
*/
void pulp_conv2d_fp32_bw_cl( void * Conv2D_args );
/**
* @brief Backward pass function which computes weight's gradient only
* @param input input feauture maps for the conv2d layer
* @param coeff weight matrix
* @param output output feature maps for the conv2d layer
* @param Lpad left padding
* @param Rpad right padding
* @param Upad upper padding
* @param Dpad lower padding
* @param stride_w stride in input width
* @param stride_h stride in input height
* @param i2c_buffer pointer to the im2col buffer
* @param HWC tells the 2D Convolution if the input tensor is in CHW layout (HWC=0) or HWC format (HWC=1)
* @param opt_matmul_type_wg number of the optimizer matmul to be chosen by the mm_manager (see mm_manager_list.txt)
* @param USE_IM2COL if set to 0, the convd kernel calls for the naive implementation, if set to 1 for the im2col+matmul optimized execution
* @param USE_DMA_IM2COL in case the primitive uses IM2COL + MM, select if to perform im2col using DMA-managed transfers from L2 to L1 (input tensor needs to be stored in L2, im2col_buffer in L1)
*/
void pulp_conv2d_fp32_bw_param_grads_cl( void * Conv2D_args );
/**
* @brief Backward pass function which computes input's gradient only
* @param input input feauture maps for the conv2d layer
* @param coeff weight matrix
* @param output output feature maps for the conv2d layer
* @param Lpad left padding
* @param Rpad right padding
* @param Upad upper padding
* @param Dpad lower padding
* @param stride_w stride in input width
* @param stride_h stride in input height
* @param i2c_buffer pointer to the im2col buffer
* @param bt_buffer pointer to the blocktranspose buffer (to reshape the weights for the in grad step)
* @param HWC tells the 2D Convolution if the output tensor is in CHW layout (HWC=0) or HWC format (HWC=1)
* @param opt_matmul_type_ig number of the optimizer matmul to be chosen by the mm_manager (see mm_manager_list.txt)
* @param USE_IM2COL if set to 0, the convd kernel calls for the naive implementation, if set to 1 for the im2col+matmul optimized execution
* @param USE_DMA_IM2COL in case the primitive uses IM2COL + MM, select if to perform im2col using DMA-managed transfers from L2 to L1 (output gradient tensor needs to be stored in L2, im2col_buffer in L1)
*/
void pulp_conv2d_fp32_bw_input_grads_cl( void * Conv2D_args );