Loop - 1 vs 11#
Next section compares an older to a newer version of the same operator after both definition are converted into markdown text. Green means an addition to the newer version, red means a deletion. Anything else is unchanged.
- Loop1 → Loop11 +26 -46
Loop1 → Loop11
RENAMED
@@ -1 +1 @@
|
|
1
1
|
Generic Looping construct. This loop has multiple termination conditions:
|
2
2
|
1) Trip count. Iteration count specified at runtime. Set by
|
3
3
|
specifying the input M. Optional. Set to empty string to omit.
|
4
4
|
Note that a static trip count (specified at graph construction time) can be
|
5
5
|
specified by passing in a constant node for input M.
|
6
6
|
2) Loop termination condition. This is an input to the op that determines
|
7
7
|
whether to run the first iteration and also a loop-carried dependency for
|
8
8
|
the body graph. The body graph must yield a value for the condition variable,
|
9
9
|
whether this input is provided or not.
|
10
10
|
This table summarizes the operating modes of this operator with equivalent
|
11
11
|
C-style code:
|
12
12
|
Operator inputs defined as (max_trip_count, condition_var).
|
13
13
|
input ("", ""):
|
14
14
|
for (int i=0; ; ++i) {
|
15
15
|
cond = ... // Note this value is ignored, but is required in the body
|
16
16
|
}
|
17
17
|
input ("", cond) // Note this is analogous to a while loop
|
18
18
|
bool cond = ...;
|
19
19
|
for (int i=0; cond; ++i) {
|
20
20
|
cond = ...;
|
21
21
|
}
|
22
22
|
input ("", 1) // Note this is analogous to a do-while loop
|
23
23
|
bool cond = true
|
24
24
|
for (int i=0; cond; ++i) {
|
25
25
|
cond = ...;
|
26
26
|
}
|
27
27
|
input (trip_count, "") // Note this is analogous to a for loop
|
28
28
|
int trip_count = ...
|
29
29
|
for (int i=0; i < trip_count; ++i) {
|
30
30
|
cond = ...; // ignored
|
31
31
|
}
|
32
32
|
input (trip_count, cond)
|
33
33
|
int trip_count = ...;
|
34
34
|
bool cond = ...;
|
35
35
|
for (int i=0; i < trip_count &amp;&amp; cond; ++i) {
|
36
36
|
cond = ...;
|
37
37
|
}
|
38
38
|
*Sample usage - cond as well as trip count*
|
39
39
|
graph predict-net {
|
40
40
|
%a = Constant[value = <Scalar Tensor [3]>]()
|
41
41
|
%b = Constant[value = <Scalar Tensor [6]>]()
|
42
42
|
%keepgoing = Constant[value = <Scalar Tensor [1]>]()
|
43
43
|
%max_trip_count = Constant[value = <Scalar Tensor [10]>]()
|
44
44
|
%keepgoing_out, %b_out, %user_defined_vals = Loop[body = <graph body-net>](%max_trip_count, %keepgoing, %b)
|
45
45
|
return
|
46
46
|
}
|
47
47
|
graph body-net (
|
48
|
-
%i[INT32, scalar]
|
48
|
+
%i[INT32, scalar]
|
49
|
-
%
|
49
|
+
%keepgoing[BOOL, scalar]
|
50
|
-
%
|
50
|
+
%b[INT32, scalar]
|
51
51
|
) {
|
52
|
-
%my_local = Add(%a, %
|
52
|
+
%my_local = Add(%a, %b)
|
53
|
-
%b_out = Sub(%a, %
|
53
|
+
%b_out = Sub(%a, %b)
|
54
|
-
%keepgoing_out = Greater(%my_local, %b_out)
|
54
|
+
%keepgoing_out = Greater(%my_local, %b_out)
|
55
|
-
%
|
55
|
+
%user_defined_vals = Add(%b, %b)
|
56
|
-
return %keepgoing_out, %b_out, %
|
56
|
+
return %keepgoing_out, %b_out, %user_defined_vals
|
57
57
|
}
|
58
58
|
*Sample equivalent C code*
|
59
59
|
{
|
60
60
|
/* User-defined code (enclosing scope) */
|
61
61
|
int a = 3, b = 6;
|
62
62
|
bool keepgoing = true; // Analogous to input cond
|
63
63
|
/* End user-defined code */
|
64
64
|
/* Implicitly-defined code */
|
65
65
|
const int max_trip_count = 10; // Analogous to input M
|
66
66
|
int user_defined_vals[]; // Imagine this is resizable
|
67
67
|
/* End implicitly-defined code */
|
68
|
+
for (int i=0; i < max_trip_count &amp;&amp; keepgoing; ++i) {
|
69
|
+
/* User-defined code (loop body) */
|
70
|
+
int my_local = a + b; // Reading values in the enclosing scope is fine
|
71
|
+
b = a - b; // writes fine if we specify b as a loop-carried dependency
|
72
|
+
keepgoing = my_local > b; // keepgoing is a loop-carried dependency
|
73
|
+
user_defined_vals[i] = b + b;
|
74
|
+
/* End user-defined code */
|
75
|
+
}
|
76
|
+
// my_local = 123; // Can't do this. my_local was defined in the body
|
68
|
-
|
77
|
+
// These below values are live-out from the loop and therefore accessible
|
69
|
-
bool keepgoing_out = keepgoing
|
70
|
-
int b_out = b
|
71
|
-
for (int i=0; i < max_trip_count &amp;&amp; keepgoing_out; ++i) {
|
72
|
-
/* Implicitly-defined code: bind actual parameter values
|
73
|
-
to formal parameter variables of loop-body */
|
74
|
-
bool keepgoing_in = keepgoing_out;
|
75
|
-
bool b_in = b_out;
|
76
|
-
|
77
|
-
/* User-defined code (loop body) */
|
78
|
-
int my_local = a + b_in; // Reading value "a" from the enclosing scope is fine
|
79
|
-
b_out = a - b_in;
|
80
|
-
keepgoing_out = my_local > b_out;
|
81
|
-
user_defined_val = b_in + b_in; // b_in and b_out are different variables
|
82
|
-
/* End user-defined code */
|
83
|
-
|
84
|
-
/* Implicitly defined-code */
|
85
|
-
user_defined_vals[i] = user_defined_val // accumulate scan-output values
|
86
|
-
}
|
87
|
-
// int t = my_local; // Can't do this. my_local is not accessible here.
|
88
|
-
|
89
|
-
// The values below are bound to the output variables of the loop and therefore accessible
|
90
|
-
|
78
|
+
b_out; user_defined_vals; keepgoing_out;
|
91
79
|
}
|
92
80
|
There are several things of note in this code snippet:
|
93
|
-
1) Values from the enclosing scope (i.e. variable
|
81
|
+
1) Values from the enclosing scope (i.e. variable a here) are in scope and can
|
94
82
|
be referenced in the inputs of the loop.
|
83
|
+
2) Any variables which you wish to make available in the enclosing scope (i.e.
|
84
|
+
the variables b and keepgoing) must be declared as either loop-carried
|
85
|
+
dependencies (both at the op inputs and output and at the body net input and
|
86
|
+
output) or scan_outputs.
|
95
|
-
2) Any values computed in the loop body that needs to be used in a subsequent
|
96
|
-
iteration or after the loop are modelled using a pair of variables in the loop-body,
|
97
|
-
consisting of an input variable (eg., b_in) and an output variable (eg., b_out).
|
98
|
-
These are referred to as loop-carried dependences. The loop operation node
|
99
|
-
supplies the input value of the input variable for the first iteration, and
|
100
|
-
returns the output value of the output variable produced by the final
|
101
|
-
iteration.
|
102
|
-
3) Scan_output variables are used to implicitly concatenate values computed across
|
103
|
-
all the iterations. In the above example, the value of user_defined_val computed
|
104
|
-
over all iterations are concatenated and returned as the value of user_defined_vals
|
105
|
-
after the loop.
|
106
|
-
|
87
|
+
3) Values created in the body cannot be accessed in the enclosing scope.
|
107
|
-
except using the mechanism described above.
|
108
88
|
Note that the semantics of this op support "diagonal" or "wavefront" execution.
|
109
89
|
(See Step 3 here for an example:
|
110
90
|
https://devblogs.nvidia.com/optimizing-recurrent-neural-networks-cudnn-5/).
|
111
91
|
Frontends should emit multi-layer RNNs as a series of While operators (with
|
112
92
|
time being the inner looping dimension), with each successive layer consuming
|
113
93
|
the scan_outputs from the previous layer, possibly going through several
|
114
94
|
point-wise operators (e.g. dropout, residual connections, linear layer).
|
115
95
|
**Attributes**
|
116
96
|
* **body** (required):
|
117
97
|
The graph run each iteration. It has 2+N inputs: (iteration_num,
|
118
98
|
condition, loop carried dependencies...). It has 1+N+K outputs:
|
119
99
|
(condition, loop carried dependencies..., scan_outputs...). Each
|
120
100
|
scan_output is created by concatenating the value of the specified
|
121
101
|
output value at the end of each iteration of the loop. It is an
|
122
102
|
error if the dimensions or data type of these scan_outputs change
|
123
103
|
across loop iterations.
|
124
104
|
**Inputs**
|
125
|
-
Between
|
105
|
+
Between 3 and 2147483647 inputs.
|
126
106
|
* **M** (optional, heterogeneous) - **I**:
|
127
107
|
A maximum trip-count for the loop specified at runtime. Optional.
|
128
108
|
Pass empty string to skip.
|
129
109
|
* **cond** (optional, heterogeneous) - **B**:
|
130
110
|
A boolean termination condition. Optional. Pass empty string to
|
131
111
|
skip.
|
132
112
|
* **v_initial** (variadic) - **V**:
|
133
113
|
The initial values of any loop-carried dependencies (values that
|
134
114
|
change across loop iterations)
|
135
115
|
**Outputs**
|
136
116
|
Between 1 and 2147483647 outputs.
|
137
117
|
* **v_final_and_scan_outputs** (variadic) - **V**:
|
138
118
|
Final N loop carried dependency values then K scan_outputs
|
139
119
|
**Type Constraints**
|
140
120
|
* **V** in (
|
141
121
|
tensor(bool),
|
142
122
|
tensor(complex128),
|
143
123
|
tensor(complex64),
|
144
124
|
tensor(double),
|
145
125
|
tensor(float),
|
146
126
|
tensor(float16),
|
147
127
|
tensor(int16),
|
148
128
|
tensor(int32),
|
149
129
|
tensor(int64),
|
150
130
|
tensor(int8),
|
151
131
|
tensor(string),
|
152
132
|
tensor(uint16),
|
153
133
|
tensor(uint32),
|
154
134
|
tensor(uint64),
|
155
135
|
tensor(uint8)
|
156
136
|
):
|
157
137
|
All Tensor types
|
158
138
|
* **I** in (
|
159
139
|
tensor(int64)
|
160
140
|
):
|
161
141
|
tensor of int64, which should be a scalar.
|
162
142
|
* **B** in (
|
163
143
|
tensor(bool)
|
164
144
|
):
|
165
145
|
tensor of bool, which should be a scalar.
|