Loop - 13 vs 16#
Next section compares an older to a newer version of the same operator after both definition are converted into markdown text. Green means an addition to the newer version, red means a deletion. Anything else is unchanged.
- Loop13 → Loop16 +1 -36
Loop13 → Loop16
RENAMED
@@ -1 +1 @@
|
|
1
1
|
Generic Looping construct. This loop has multiple termination conditions:
|
2
2
|
1) Trip count. Iteration count specified at runtime. Set by
|
3
3
|
specifying the input M. Optional. Set to empty string to omit.
|
4
4
|
Note that a static trip count (specified at graph construction time) can be
|
5
5
|
specified by passing in a constant node for input M.
|
6
6
|
2) Loop termination condition. This is an input to the op that determines
|
7
7
|
whether to run the first iteration and also a loop-carried dependency for
|
8
8
|
the body graph. The body graph must yield a value for the condition variable,
|
9
9
|
whether this input is provided or not.
|
10
10
|
This table summarizes the operating modes of this operator with equivalent
|
11
11
|
C-style code:
|
12
12
|
Operator inputs defined as (max_trip_count, condition_var).
|
13
13
|
input ("", ""):
|
14
14
|
for (int i=0; ; ++i) {
|
15
15
|
cond = ... // Note this value is ignored, but is required in the body
|
16
16
|
}
|
17
17
|
input ("", cond) // Note this is analogous to a while loop
|
18
18
|
bool cond = ...;
|
19
19
|
for (int i=0; cond; ++i) {
|
20
20
|
cond = ...;
|
21
21
|
}
|
22
22
|
input ("", 1) // Note this is analogous to a do-while loop
|
23
23
|
bool cond = true
|
24
24
|
for (int i=0; cond; ++i) {
|
25
25
|
cond = ...;
|
26
26
|
}
|
27
27
|
input (trip_count, "") // Note this is analogous to a for loop
|
28
28
|
int trip_count = ...
|
29
29
|
for (int i=0; i < trip_count; ++i) {
|
30
30
|
cond = ...; // ignored
|
31
31
|
}
|
32
32
|
input (trip_count, cond)
|
33
33
|
int trip_count = ...;
|
34
34
|
bool cond = ...;
|
35
35
|
for (int i=0; i < trip_count &amp;&amp; cond; ++i) {
|
36
36
|
cond = ...;
|
37
37
|
}
|
38
38
|
*Sample usage - cond as well as trip count*
|
39
39
|
graph predict-net {
|
40
40
|
%a = Constant[value = <Scalar Tensor [3]>]()
|
41
41
|
%b = Constant[value = <Scalar Tensor [6]>]()
|
42
42
|
%keepgoing = Constant[value = <Scalar Tensor [1]>]()
|
43
43
|
%max_trip_count = Constant[value = <Scalar Tensor [10]>]()
|
44
44
|
%keepgoing_out, %b_out, %user_defined_vals = Loop[body = <graph body-net>](%max_trip_count, %keepgoing, %b)
|
45
45
|
return
|
46
46
|
}
|
47
47
|
graph body-net (
|
48
48
|
%i[INT32, scalar] // iteration number
|
49
49
|
%keepgoing_in[BOOL, scalar] // incoming loop-termination-condition; not used
|
50
50
|
%b_in[INT32, scalar] // incoming value of loop-carried-dependency b
|
51
51
|
) {
|
52
52
|
%my_local = Add(%a, %b_in)
|
53
53
|
%b_out = Sub(%a, %b_in) // outgoing value of loop-carried-dependency b
|
54
54
|
%keepgoing_out = Greater(%my_local, %b_out) // outgoing loop-termination-condition
|
55
55
|
%user_defined_val = Add(%b_in, %b_in) // scan-output value to be accumulated
|
56
56
|
return %keepgoing_out, %b_out, %user_defined_val
|
57
57
|
}
|
58
58
|
*Sample equivalent C code*
|
59
59
|
{
|
60
60
|
/* User-defined code (enclosing scope) */
|
61
61
|
int a = 3, b = 6;
|
62
62
|
bool keepgoing = true; // Analogous to input cond
|
63
63
|
/* End user-defined code */
|
64
64
|
/* Implicitly-defined code */
|
65
65
|
const int max_trip_count = 10; // Analogous to input M
|
66
66
|
int user_defined_vals[]; // Imagine this is resizable
|
67
67
|
/* End implicitly-defined code */
|
68
68
|
/* initialize loop-carried variables and scan-output variables */
|
69
69
|
bool keepgoing_out = keepgoing
|
70
70
|
int b_out = b
|
71
71
|
for (int i=0; i < max_trip_count &amp;&amp; keepgoing_out; ++i) {
|
72
72
|
/* Implicitly-defined code: bind actual parameter values
|
73
73
|
to formal parameter variables of loop-body */
|
74
74
|
bool keepgoing_in = keepgoing_out;
|
75
75
|
bool b_in = b_out;
|
76
76
|
/* User-defined code (loop body) */
|
77
77
|
int my_local = a + b_in; // Reading value "a" from the enclosing scope is fine
|
78
78
|
b_out = a - b_in;
|
79
79
|
keepgoing_out = my_local > b_out;
|
80
80
|
user_defined_val = b_in + b_in; // b_in and b_out are different variables
|
81
81
|
/* End user-defined code */
|
82
82
|
/* Implicitly defined-code */
|
83
83
|
user_defined_vals[i] = user_defined_val // accumulate scan-output values
|
84
84
|
}
|
85
85
|
// int t = my_local; // Can't do this. my_local is not accessible here.
|
86
86
|
// The values below are bound to the output variables of the loop and therefore accessible
|
87
87
|
// b_out; user_defined_vals; keepgoing_out;
|
88
88
|
}
|
89
89
|
There are several things of note in this code snippet:
|
90
90
|
1) Values from the enclosing scope (i.e. variable "a" here) are in scope and can
|
91
91
|
be referenced in the inputs of the loop.
|
92
92
|
2) Any values computed in the loop body that needs to be used in a subsequent
|
93
93
|
iteration or after the loop are modelled using a pair of variables in the loop-body,
|
94
94
|
consisting of an input variable (eg., b_in) and an output variable (eg., b_out).
|
95
95
|
These are referred to as loop-carried dependences. The loop operation node
|
96
96
|
supplies the input value of the input variable for the first iteration, and
|
97
97
|
returns the output value of the output variable produced by the final
|
98
98
|
iteration.
|
99
99
|
3) Scan_output variables are used to implicitly concatenate values computed across
|
100
100
|
all the iterations. In the above example, the value of user_defined_val computed
|
101
101
|
over all iterations are concatenated and returned as the value of user_defined_vals
|
102
102
|
after the loop.
|
103
103
|
4) Values created in the body cannot be accessed in the enclosing scope,
|
104
104
|
except using the mechanism described above.
|
105
105
|
Note that the semantics of this op support "diagonal" or "wavefront" execution.
|
106
106
|
(See Step 3 here for an example:
|
107
107
|
https://devblogs.nvidia.com/optimizing-recurrent-neural-networks-cudnn-5/).
|
108
108
|
Frontends should emit multi-layer RNNs as a series of While operators (with
|
109
109
|
time being the inner looping dimension), with each successive layer consuming
|
110
110
|
the scan_outputs from the previous layer, possibly going through several
|
111
111
|
point-wise operators (e.g. dropout, residual connections, linear layer).
|
112
112
|
The input/output of subgraph (produced by loop node) matching is based on order instead of name. The implementation will figure out the names based on this order.
|
113
113
|
**Attributes**
|
114
114
|
* **body** (required):
|
115
115
|
The graph run each iteration. It has 2+N inputs: (iteration_num,
|
116
116
|
condition, loop carried dependencies...). It has 1+N+K outputs:
|
117
117
|
(condition, loop carried dependencies..., scan_outputs...). Each
|
118
118
|
scan_output is created by concatenating the value of the specified
|
119
119
|
output value at the end of each iteration of the loop. It is an
|
120
120
|
error if the dimensions or data type of these scan_outputs change
|
121
121
|
across loop iterations.
|
122
122
|
**Inputs**
|
123
123
|
Between 2 and 2147483647 inputs.
|
124
124
|
* **M** (optional, heterogeneous) - **I**:
|
125
125
|
A maximum trip-count for the loop specified at runtime. Optional.
|
126
126
|
Pass empty string to skip.
|
127
127
|
* **cond** (optional, heterogeneous) - **B**:
|
128
128
|
A boolean termination condition. Optional. Pass empty string to
|
129
129
|
skip.
|
130
130
|
* **v_initial** (variadic) - **V**:
|
131
131
|
The initial values of any loop-carried dependencies (values that
|
132
132
|
change across loop iterations)
|
133
133
|
**Outputs**
|
134
134
|
Between 1 and 2147483647 outputs.
|
135
135
|
* **v_final_and_scan_outputs** (variadic) - **V**:
|
136
136
|
Final N loop carried dependency values then K scan_outputs. Scan
|
137
137
|
outputs must be Tensors.
|
138
138
|
**Type Constraints**
|
139
139
|
* **V** in (
|
140
|
-
optional(seq(tensor(bfloat16))),
|
141
|
-
optional(seq(tensor(bool))),
|
142
|
-
optional(seq(tensor(complex128))),
|
143
|
-
optional(seq(tensor(complex64))),
|
144
|
-
optional(seq(tensor(double))),
|
145
|
-
optional(seq(tensor(float))),
|
146
|
-
optional(seq(tensor(float16))),
|
147
|
-
optional(seq(tensor(int16))),
|
148
|
-
optional(seq(tensor(int32))),
|
149
|
-
optional(seq(tensor(int64))),
|
150
|
-
optional(seq(tensor(int8))),
|
151
|
-
optional(seq(tensor(string))),
|
152
|
-
optional(seq(tensor(uint16))),
|
153
|
-
optional(seq(tensor(uint32))),
|
154
|
-
optional(seq(tensor(uint64))),
|
155
|
-
optional(seq(tensor(uint8))),
|
156
|
-
optional(tensor(bfloat16)),
|
157
|
-
optional(tensor(bool)),
|
158
|
-
optional(tensor(complex128)),
|
159
|
-
optional(tensor(complex64)),
|
160
|
-
optional(tensor(double)),
|
161
|
-
optional(tensor(float)),
|
162
|
-
optional(tensor(float16)),
|
163
|
-
optional(tensor(int16)),
|
164
|
-
optional(tensor(int32)),
|
165
|
-
optional(tensor(int64)),
|
166
|
-
optional(tensor(int8)),
|
167
|
-
optional(tensor(string)),
|
168
|
-
optional(tensor(uint16)),
|
169
|
-
optional(tensor(uint32)),
|
170
|
-
optional(tensor(uint64)),
|
171
|
-
optional(tensor(uint8)),
|
172
|
-
seq(tensor(bfloat16)),
|
173
140
|
seq(tensor(bool)),
|
174
141
|
seq(tensor(complex128)),
|
175
142
|
seq(tensor(complex64)),
|
176
143
|
seq(tensor(double)),
|
177
144
|
seq(tensor(float)),
|
178
145
|
seq(tensor(float16)),
|
179
146
|
seq(tensor(int16)),
|
180
147
|
seq(tensor(int32)),
|
181
148
|
seq(tensor(int64)),
|
182
149
|
seq(tensor(int8)),
|
183
150
|
seq(tensor(string)),
|
184
151
|
seq(tensor(uint16)),
|
185
152
|
seq(tensor(uint32)),
|
186
153
|
seq(tensor(uint64)),
|
187
154
|
seq(tensor(uint8)),
|
188
|
-
tensor(bfloat16),
|
189
155
|
tensor(bool),
|
190
156
|
tensor(complex128),
|
191
157
|
tensor(complex64),
|
192
158
|
tensor(double),
|
193
159
|
tensor(float),
|
194
160
|
tensor(float16),
|
195
161
|
tensor(int16),
|
196
162
|
tensor(int32),
|
197
163
|
tensor(int64),
|
198
164
|
tensor(int8),
|
199
165
|
tensor(string),
|
200
166
|
tensor(uint16),
|
201
167
|
tensor(uint32),
|
202
168
|
tensor(uint64),
|
203
169
|
tensor(uint8)
|
204
170
|
):
|
171
|
+
All Tensor and Sequence types
|
205
|
-
All Tensor, Sequence(Tensor), Optional(Tensor), and
|
206
|
-
Optional(Sequence(Tensor)) types
|
207
172
|
* **I** in (
|
208
173
|
tensor(int64)
|
209
174
|
):
|
210
175
|
tensor of int64, which should be a scalar.
|
211
176
|
* **B** in (
|
212
177
|
tensor(bool)
|
213
178
|
):
|
214
179
|
tensor of bool, which should be a scalar.
|