156156@functor Dense
157157
158158function (a:: Dense )(x:: AbstractVecOrMat )
159- W, b = a. weight, a. bias
160159 σ = NNlib. fast_act (a. σ, x) # replaces tanh => tanh_fast, etc
161- return σ .(W * x .+ b )
160+ return σ .(a . weight * x .+ a . bias )
162161end
163162
164163(a:: Dense )(x:: AbstractArray ) =
@@ -172,35 +171,37 @@ function Base.show(io::IO, l::Dense)
172171end
173172
174173"""
175- Diagonal(size::Integer...; bias=true, init=ones32)
176- Diagonal(scale::AbstractArray, [bias])
174+ Diagonal(size::Integer...; σ = identity, bias=true, init=ones32)
175+ Diagonal(scale::AbstractArray, [bias, activation ])
177176
178177Create an element-wise linear layer, which performs
179178
180- y = scale .* x .+ bias
179+ y = σ.( scale .* x .+ bias)
181180
182- with no activation function.
183-
184181The learnable scale & bias are initialised `init(size...)` and `zeros32(size...)`,
185182with `init=ones32` by default. You may specify the function `init`,
186183turn off trainable bias with `bias=false`, or provide the array(s) explicitly.
187184
188185Used by [`LayerNorm`](@ref).
189186"""
190- struct Diagonal{A<: AbstractArray , B}
187+ struct Diagonal{A<: AbstractArray , B, F }
191188 scale:: A
192189 bias:: B
193- function Diagonal (W:: M , bias = true ) where M<: AbstractArray
190+ σ:: F
191+ function Diagonal (W:: M , bias = true , σ:: F = identity) where {M<: AbstractArray , F}
194192 b = create_bias (W, bias, size (W)... )
195- new {M, typeof(b)} (W, b)
193+ new {M, typeof(b), F } (W, b, σ )
196194 end
197195end
198196
199- Diagonal (sz:: Integer... ; bias = true , init = ones32) = Diagonal (init (sz... ), bias)
197+ Diagonal (sz:: Integer... ; σ = identity, bias = true , init = ones32) = Diagonal (init (sz... ), bias, σ )
200198
201199@functor Diagonal
202200
203- (a:: Diagonal )(x) = a. scale .* x .+ a. bias
201+ function (a:: Diagonal )(x:: AbstractArray )
202+ σ = NNlib. fast_act (a. σ, x) # replaces tanh => tanh_fast, etc
203+ return σ === typeof (identity) ? a. scale .* x .+ a. bias : σ .(a. scale .* x .+ a. bias)
204+ end
204205
205206function Base. show (io:: IO , l:: Diagonal )
206207 print (io, " Diagonal(" , join (size (l. scale), " , " ))
212213 Maxout(layers...)
213214 Maxout(f, n_alts)
214215
215- This contains a number of internal layes , each of which receives the same input.
216+ This contains a number of internal layers , each of which receives the same input.
216217Its output is the elementwise maximum of the the internal layers' outputs.
217218
218219Instead of defining layers individually, you can provide a zero-argument function
0 commit comments