diff --git a/src/nf/nf_layer_constructors.f90 b/src/nf/nf_layer_constructors.f90 index ce00b6bc..cf0402de 100644 --- a/src/nf/nf_layer_constructors.f90 +++ b/src/nf/nf_layer_constructors.f90 @@ -213,12 +213,14 @@ module function reshape(output_shape) result(res) !! Resulting layer instance end function reshape - module function linear2d(out_features) result(res) + module function linear2d(out_features, biases) result(res) !! Rank-2 (sequence_length, out_features) linear layer constructor. !! sequence_length is determined at layer initialization, based on the !! output shape of the previous layer. integer, intent(in) :: out_features !! Number of output features + logical, optional :: biases + !! Whether to use biases or not type(layer) :: res !! Resulting layer instance end function linear2d diff --git a/src/nf/nf_layer_constructors_submodule.f90 b/src/nf/nf_layer_constructors_submodule.f90 index 5c2e8893..19cf7a90 100644 --- a/src/nf/nf_layer_constructors_submodule.f90 +++ b/src/nf/nf_layer_constructors_submodule.f90 @@ -163,12 +163,13 @@ module function reshape(output_shape) result(res) end function reshape - module function linear2d(out_features) result(res) + module function linear2d(out_features, biases) result(res) integer, intent(in) :: out_features + logical, optional :: biases type(layer) :: res res % name = 'linear2d' - allocate(res % p, source=linear2d_layer(out_features)) + allocate(res % p, source=linear2d_layer(out_features, biases)) end function linear2d diff --git a/src/nf/nf_linear2d_layer.f90 b/src/nf/nf_linear2d_layer.f90 index f785a14c..7a16a271 100644 --- a/src/nf/nf_linear2d_layer.f90 +++ b/src/nf/nf_linear2d_layer.f90 @@ -9,7 +9,8 @@ module nf_linear2d_layer public :: linear2d_layer type, extends(base_layer) :: linear2d_layer - integer :: sequence_length, in_features, out_features, batch_size + integer :: sequence_length, in_features, out_features + logical :: use_biases real, allocatable :: weights(:,:) real, allocatable :: biases(:) @@ -31,8 +32,9 @@ module nf_linear2d_layer end type linear2d_layer interface linear2d_layer - module function linear2d_layer_cons(out_features) result(res) + module function linear2d_layer_cons(out_features, biases) result(res) integer, intent(in) :: out_features + logical, optional, intent(in) :: biases type(linear2d_layer) :: res end function linear2d_layer_cons end interface linear2d_layer diff --git a/src/nf/nf_linear2d_layer_submodule.f90 b/src/nf/nf_linear2d_layer_submodule.f90 index 0dfe7e27..b38aab8d 100644 --- a/src/nf/nf_linear2d_layer_submodule.f90 +++ b/src/nf/nf_linear2d_layer_submodule.f90 @@ -5,11 +5,17 @@ contains - module function linear2d_layer_cons(out_features) result(res) + module function linear2d_layer_cons(out_features, biases) result(res) integer, intent(in) :: out_features + logical, optional, intent(in) :: biases type(linear2d_layer) :: res res % out_features = out_features + if (present(biases)) then + res % use_biases = biases + else + res % use_biases = .true. + end if end function linear2d_layer_cons @@ -36,8 +42,10 @@ module subroutine init(self, input_shape) allocate(self % dw(self % in_features, self % out_features)) self % dw = 0 - allocate(self % db(self % out_features)) - self % db = 0 + if (self % use_biases) then + allocate(self % db(self % out_features)) + self % db = 0 + end if end subroutine init @@ -48,9 +56,11 @@ pure module subroutine forward(self, input) integer :: i self % output(:,:) = matmul(input(:,:), self % weights) - do concurrent(i = 1:self % sequence_length) - self % output(i,:) = self % output(i,:) + self % biases - end do + if (self % use_biases) then + do concurrent(i = 1:self % sequence_length) + self % output(i,:) = self % output(i,:) + self % biases + end do + end if end subroutine forward @@ -64,7 +74,9 @@ pure module subroutine backward(self, input, gradient) integer :: i self % dw = self % dw + matmul(transpose(input(:,:)), gradient(:,:)) - self % db = self % db + sum(gradient(:,:), 1) + if (self % use_biases) then + self % db = self % db + sum(gradient(:,:), 1) + end if self % gradient(:,:) = matmul(gradient(:,:), transpose(self % weights)) end subroutine backward @@ -74,7 +86,10 @@ pure module function get_num_params(self) result(num_params) integer :: num_params ! Number of weights times number of biases - num_params = self % in_features * self % out_features + self % out_features + num_params = self % in_features * self % out_features + if (self % use_biases) then + num_params = num_params + self % out_features + end if end function get_num_params @@ -87,10 +102,14 @@ module function get_params(self) result(params) w_(1: product(shape(self % weights))) => self % weights - params = [ & - w_, & - self % biases & - ] + if (self % use_biases) then + params = [ & + w_, & + self % biases & + ] + else + params = w_ + end if end function get_params @@ -103,10 +122,14 @@ module function get_gradients(self) result(gradients) dw_(1: product(shape(self % dw))) => self % dw - gradients = [ & - dw_, & - self % db & - ] + if (self % use_biases) then + gradients = [ & + dw_, & + self % db & + ] + else + gradients = dw_ + end if end function get_gradients @@ -127,10 +150,12 @@ module subroutine set_params(self, params) p_(1:self % in_features, 1:self % out_features) => params(1 : n) self % weights = p_ - ! reshape the biases - self % biases = params(n + 1 : n + self % out_features) + if (self % use_biases) then + ! reshape the biases + self % biases = params(n + 1 : n + self % out_features) + end if end associate end subroutine set_params -end submodule nf_linear2d_layer_submodule \ No newline at end of file +end submodule nf_linear2d_layer_submodule