I'm running an AI code on an ESP32S3 board equipped with 8MB PSRAM and 16MB Flash. The code is doing the inference, and all the parameters and the input data are in separate .h files.
The AI network is made of several layers, and the code works correctly and gives the correct results up to the last layer. I mean that when I comment the call of the function for the last layer, everything is fine. But when I uncomment it, there is nothing printed on the serial monitor, although I print many memory and text information.
This is why I think it is a memory problem.
Here is the function runningthe inference, called by the setup of the Arduino IDE code:
Code: Select all
#include <string.h>
#include <stdlib.h>
#include <stdint.h>
#include "../include/network/rescaling.hpp"
// Layer & memory configurations
#include "../include/kernels/convolution.hpp"
#include "../layers/feature_feature_0_Conv_output_0.h"
#include "../include/kernels/activation.hpp"
#include "../layers/feature_feature_1_Relu.h"
#include "../include/kernels/pooling.hpp"
#include "../layers/feature_feature_2_MaxPool_output_0.h"
#include "../include/kernels/convolution.hpp"
#include "../layers/feature_feature_3_Conv_output_0.h"
#include "../include/kernels/activation.hpp"
#include "../layers/feature_feature_4_Relu.h"
#include "../include/kernels/pooling.hpp"
#include "../layers/feature_feature_5_MaxPool_output_0.h"
#include "../include/kernels/fullyconnected.hpp"
#include "../layers/classifier_classifier_1_Gemm_output_0.h"
#include "../include/kernels/activation.hpp"
#include "../layers/classifier_classifier_2_Relu.h"
#include "../include/kernels/fullyconnected.hpp"
#include "../layers/classifier_classifier_3_Gemm_output_0.h"
#include "../include/kernels/activation.hpp"
#include "../layers/classifier_classifier_4_Relu.h"
#include "../include/kernels/fullyconnected.hpp"
#include "../layers/output.h"
#include "../memory/mem_info.h"
#include "esp_heap_caps.h"
void model_forward(float* result)
{
int dim;
float* input;
float* output;
float* weights;
float* bias;
input = (float*) malloc (FEATURE_FEATURE_0_CONV_OUTPUT_0_NB_CHANNELS * FEATURE_FEATURE_0_CONV_OUTPUT_0_CHANNELS_HEIGHT * FEATURE_FEATURE_0_CONV_OUTPUT_0_CHANNELS_WIDTH * sizeof(float));
output = (float*) malloc (FEATURE_FEATURE_0_CONV_OUTPUT_0_NB_OUTPUTS * FEATURE_FEATURE_0_CONV_OUTPUT_0_OUTPUTS_HEIGHT * FEATURE_FEATURE_0_CONV_OUTPUT_0_OUTPUTS_WIDTH * sizeof(float));
printf("Input + output %d\n", 4*(FEATURE_FEATURE_0_CONV_OUTPUT_0_NB_CHANNELS * FEATURE_FEATURE_0_CONV_OUTPUT_0_CHANNELS_HEIGHT * FEATURE_FEATURE_0_CONV_OUTPUT_0_CHANNELS_WIDTH+ FEATURE_FEATURE_0_CONV_OUTPUT_0_NB_OUTPUTS * FEATURE_FEATURE_0_CONV_OUTPUT_0_OUTPUTS_HEIGHT * FEATURE_FEATURE_0_CONV_OUTPUT_0_OUTPUTS_WIDTH));
{
#include "../../inputs.h"
memcpy(input, inputs, FEATURE_FEATURE_0_CONV_OUTPUT_0_NB_CHANNELS * FEATURE_FEATURE_0_CONV_OUTPUT_0_CHANNELS_HEIGHT * FEATURE_FEATURE_0_CONV_OUTPUT_0_CHANNELS_WIDTH * sizeof(float));
#include "../parameters/feature_0_weight.h"
#include "../parameters/feature_0_bias.h"
convolution_forward<FEATURE_FEATURE_0_CONV_OUTPUT_0_NB_CHANNELS,
FEATURE_FEATURE_0_CONV_OUTPUT_0_CHANNELS_HEIGHT,
FEATURE_FEATURE_0_CONV_OUTPUT_0_CHANNELS_WIDTH,
FEATURE_FEATURE_0_CONV_OUTPUT_0_NB_OUTPUTS,
FEATURE_FEATURE_0_CONV_OUTPUT_0_OUTPUTS_HEIGHT,
FEATURE_FEATURE_0_CONV_OUTPUT_0_OUTPUTS_WIDTH,
FEATURE_FEATURE_0_CONV_OUTPUT_0_PADDING_Y,
FEATURE_FEATURE_0_CONV_OUTPUT_0_PADDING_X,
FEATURE_FEATURE_0_CONV_OUTPUT_0_STRIDE_Y,
FEATURE_FEATURE_0_CONV_OUTPUT_0_STRIDE_X,
FEATURE_FEATURE_0_CONV_OUTPUT_0_DILATION_Y,
FEATURE_FEATURE_0_CONV_OUTPUT_0_DILATION_X,
FEATURE_FEATURE_0_CONV_OUTPUT_0_KERNEL_HEIGHT,
FEATURE_FEATURE_0_CONV_OUTPUT_0_KERNEL_WIDTH,
FEATURE_FEATURE_0_CONV_OUTPUT_0_ACTIVATION>
(input, output, weights, bias, FEATURE_FEATURE_0_CONV_OUTPUT_0_RESCALING);
}
printf("Consumed heap: %d\n", 8555404-heap_caps_get_free_size(MALLOC_CAP_8BIT));
//
//
input = (float*) realloc(input, FEATURE_FEATURE_0_CONV_OUTPUT_0_NB_OUTPUTS * FEATURE_FEATURE_0_CONV_OUTPUT_0_OUTPUTS_HEIGHT * FEATURE_FEATURE_0_CONV_OUTPUT_0_OUTPUTS_WIDTH * sizeof(float));
printf("Input + output %d\n", 8*FEATURE_FEATURE_0_CONV_OUTPUT_0_NB_OUTPUTS * FEATURE_FEATURE_0_CONV_OUTPUT_0_OUTPUTS_HEIGHT * FEATURE_FEATURE_0_CONV_OUTPUT_0_OUTPUTS_WIDTH);
memcpy(input, output, FEATURE_FEATURE_0_CONV_OUTPUT_0_NB_OUTPUTS * FEATURE_FEATURE_0_CONV_OUTPUT_0_OUTPUTS_HEIGHT * FEATURE_FEATURE_0_CONV_OUTPUT_0_OUTPUTS_WIDTH * sizeof(float));
activation_forward<FEATURE_FEATURE_1_RELU_NB_DATA,
FEATURE_FEATURE_1_RELU_ACTIVATION>
(input, output, FEATURE_FEATURE_0_CONV_OUTPUT_0_RESCALING);
printf("Consumed heap: %d\n", 8555404-heap_caps_get_free_size(MALLOC_CAP_8BIT));
//
//
input = (float*) realloc(input, FEATURE_FEATURE_2_MAXPOOL_OUTPUT_0_NB_CHANNELS * FEATURE_FEATURE_2_MAXPOOL_OUTPUT_0_CHANNELS_HEIGHT * FEATURE_FEATURE_2_MAXPOOL_OUTPUT_0_CHANNELS_WIDTH * sizeof(float));
memcpy(input, output, FEATURE_FEATURE_2_MAXPOOL_OUTPUT_0_NB_CHANNELS * FEATURE_FEATURE_2_MAXPOOL_OUTPUT_0_CHANNELS_HEIGHT * FEATURE_FEATURE_2_MAXPOOL_OUTPUT_0_CHANNELS_WIDTH * sizeof(float));
output = (float*) realloc(output, FEATURE_FEATURE_2_MAXPOOL_OUTPUT_0_NB_OUTPUTS * FEATURE_FEATURE_2_MAXPOOL_OUTPUT_0_OUTPUTS_HEIGHT * FEATURE_FEATURE_2_MAXPOOL_OUTPUT_0_OUTPUTS_WIDTH * sizeof(float));
printf("Input + output %d\n", 4*(FEATURE_FEATURE_2_MAXPOOL_OUTPUT_0_NB_CHANNELS * FEATURE_FEATURE_2_MAXPOOL_OUTPUT_0_CHANNELS_HEIGHT * FEATURE_FEATURE_2_MAXPOOL_OUTPUT_0_CHANNELS_WIDTH+ FEATURE_FEATURE_2_MAXPOOL_OUTPUT_0_NB_OUTPUTS*FEATURE_FEATURE_2_MAXPOOL_OUTPUT_0_OUTPUTS_HEIGHT*FEATURE_FEATURE_2_MAXPOOL_OUTPUT_0_OUTPUTS_WIDTH));
pooling_forward<FEATURE_FEATURE_2_MAXPOOL_OUTPUT_0_NB_CHANNELS,
FEATURE_FEATURE_2_MAXPOOL_OUTPUT_0_CHANNELS_HEIGHT,
FEATURE_FEATURE_2_MAXPOOL_OUTPUT_0_CHANNELS_WIDTH,
FEATURE_FEATURE_2_MAXPOOL_OUTPUT_0_NB_OUTPUTS,
FEATURE_FEATURE_2_MAXPOOL_OUTPUT_0_OUTPUTS_HEIGHT,
FEATURE_FEATURE_2_MAXPOOL_OUTPUT_0_OUTPUTS_WIDTH,
FEATURE_FEATURE_2_MAXPOOL_OUTPUT_0_PADDING_Y,
FEATURE_FEATURE_2_MAXPOOL_OUTPUT_0_PADDING_X,
FEATURE_FEATURE_2_MAXPOOL_OUTPUT_0_STRIDE_Y,
FEATURE_FEATURE_2_MAXPOOL_OUTPUT_0_STRIDE_X,
FEATURE_FEATURE_2_MAXPOOL_OUTPUT_0_KERNEL_HEIGHT,
FEATURE_FEATURE_2_MAXPOOL_OUTPUT_0_KERNEL_WIDTH,
FEATURE_FEATURE_2_MAXPOOL_OUTPUT_0_POOLING_TYPE,
FEATURE_FEATURE_2_MAXPOOL_OUTPUT_0_ACTIVATION>
(input, output);
printf("Consumed heap: %d\n", 8555404-heap_caps_get_free_size(MALLOC_CAP_8BIT));
//
//
input = (float*) realloc(input, FEATURE_FEATURE_3_CONV_OUTPUT_0_NB_CHANNELS * FEATURE_FEATURE_3_CONV_OUTPUT_0_CHANNELS_HEIGHT * FEATURE_FEATURE_3_CONV_OUTPUT_0_CHANNELS_WIDTH * sizeof(float));
memcpy(input, output, FEATURE_FEATURE_3_CONV_OUTPUT_0_NB_CHANNELS * FEATURE_FEATURE_3_CONV_OUTPUT_0_CHANNELS_HEIGHT * FEATURE_FEATURE_3_CONV_OUTPUT_0_CHANNELS_WIDTH * sizeof(float));
output = (float*) realloc(output, FEATURE_FEATURE_3_CONV_OUTPUT_0_NB_OUTPUTS * FEATURE_FEATURE_3_CONV_OUTPUT_0_OUTPUTS_HEIGHT * FEATURE_FEATURE_3_CONV_OUTPUT_0_OUTPUTS_WIDTH * sizeof(float));
printf("Input + output %d\n", 4*(FEATURE_FEATURE_3_CONV_OUTPUT_0_NB_CHANNELS * FEATURE_FEATURE_3_CONV_OUTPUT_0_CHANNELS_HEIGHT * FEATURE_FEATURE_3_CONV_OUTPUT_0_CHANNELS_WIDTH+ FEATURE_FEATURE_3_CONV_OUTPUT_0_NB_OUTPUTS * FEATURE_FEATURE_3_CONV_OUTPUT_0_OUTPUTS_HEIGHT * FEATURE_FEATURE_3_CONV_OUTPUT_0_OUTPUTS_WIDTH));
{
#include "../parameters/feature_3_weight.h"
#include "../parameters/feature_3_bias.h"
convolution_forward<FEATURE_FEATURE_3_CONV_OUTPUT_0_NB_CHANNELS,
FEATURE_FEATURE_3_CONV_OUTPUT_0_CHANNELS_HEIGHT,
FEATURE_FEATURE_3_CONV_OUTPUT_0_CHANNELS_WIDTH,
FEATURE_FEATURE_3_CONV_OUTPUT_0_NB_OUTPUTS,
FEATURE_FEATURE_3_CONV_OUTPUT_0_OUTPUTS_HEIGHT,
FEATURE_FEATURE_3_CONV_OUTPUT_0_OUTPUTS_WIDTH,
FEATURE_FEATURE_3_CONV_OUTPUT_0_PADDING_Y,
FEATURE_FEATURE_3_CONV_OUTPUT_0_PADDING_X,
FEATURE_FEATURE_3_CONV_OUTPUT_0_STRIDE_Y,
FEATURE_FEATURE_3_CONV_OUTPUT_0_STRIDE_X,
FEATURE_FEATURE_3_CONV_OUTPUT_0_DILATION_Y,
FEATURE_FEATURE_3_CONV_OUTPUT_0_DILATION_X,
FEATURE_FEATURE_3_CONV_OUTPUT_0_KERNEL_HEIGHT,
FEATURE_FEATURE_3_CONV_OUTPUT_0_KERNEL_WIDTH,
FEATURE_FEATURE_3_CONV_OUTPUT_0_ACTIVATION>
(input, output, weights, bias, FEATURE_FEATURE_3_CONV_OUTPUT_0_RESCALING);
}
printf("Consumed heap: %d\n", 8555404-heap_caps_get_free_size(MALLOC_CAP_8BIT));
//
//
input = (float*) realloc(input, FEATURE_FEATURE_3_CONV_OUTPUT_0_NB_OUTPUTS * FEATURE_FEATURE_3_CONV_OUTPUT_0_OUTPUTS_HEIGHT * FEATURE_FEATURE_3_CONV_OUTPUT_0_OUTPUTS_WIDTH * sizeof(float));
memcpy(input, output, FEATURE_FEATURE_3_CONV_OUTPUT_0_NB_OUTPUTS * FEATURE_FEATURE_3_CONV_OUTPUT_0_OUTPUTS_HEIGHT * FEATURE_FEATURE_3_CONV_OUTPUT_0_OUTPUTS_WIDTH * sizeof(float));
printf("Input + output %d\n", 8*FEATURE_FEATURE_3_CONV_OUTPUT_0_NB_OUTPUTS * FEATURE_FEATURE_3_CONV_OUTPUT_0_OUTPUTS_HEIGHT * FEATURE_FEATURE_3_CONV_OUTPUT_0_OUTPUTS_WIDTH);
activation_forward<FEATURE_FEATURE_4_RELU_NB_DATA,
FEATURE_FEATURE_4_RELU_ACTIVATION>
(input, output, FEATURE_FEATURE_3_CONV_OUTPUT_0_RESCALING);
printf("Consumed heap: %d\n", 8555404-heap_caps_get_free_size(MALLOC_CAP_8BIT));
//
//
input = (float*) realloc(input, FEATURE_FEATURE_5_MAXPOOL_OUTPUT_0_NB_CHANNELS * FEATURE_FEATURE_5_MAXPOOL_OUTPUT_0_CHANNELS_HEIGHT * FEATURE_FEATURE_5_MAXPOOL_OUTPUT_0_CHANNELS_WIDTH * sizeof(float));
memcpy(input, output, FEATURE_FEATURE_5_MAXPOOL_OUTPUT_0_NB_CHANNELS * FEATURE_FEATURE_5_MAXPOOL_OUTPUT_0_CHANNELS_HEIGHT * FEATURE_FEATURE_5_MAXPOOL_OUTPUT_0_CHANNELS_WIDTH * sizeof(float));
output = (float*) realloc(output, FEATURE_FEATURE_5_MAXPOOL_OUTPUT_0_NB_OUTPUTS * FEATURE_FEATURE_5_MAXPOOL_OUTPUT_0_OUTPUTS_HEIGHT * FEATURE_FEATURE_5_MAXPOOL_OUTPUT_0_OUTPUTS_WIDTH * sizeof(float));
printf("Input + output %d\n", 4*(FEATURE_FEATURE_5_MAXPOOL_OUTPUT_0_NB_CHANNELS * FEATURE_FEATURE_5_MAXPOOL_OUTPUT_0_CHANNELS_HEIGHT * FEATURE_FEATURE_5_MAXPOOL_OUTPUT_0_CHANNELS_WIDTH+ FEATURE_FEATURE_5_MAXPOOL_OUTPUT_0_NB_OUTPUTS * FEATURE_FEATURE_5_MAXPOOL_OUTPUT_0_OUTPUTS_HEIGHT * FEATURE_FEATURE_5_MAXPOOL_OUTPUT_0_OUTPUTS_WIDTH));
pooling_forward<FEATURE_FEATURE_5_MAXPOOL_OUTPUT_0_NB_CHANNELS,
FEATURE_FEATURE_5_MAXPOOL_OUTPUT_0_CHANNELS_HEIGHT,
FEATURE_FEATURE_5_MAXPOOL_OUTPUT_0_CHANNELS_WIDTH,
FEATURE_FEATURE_5_MAXPOOL_OUTPUT_0_NB_OUTPUTS,
FEATURE_FEATURE_5_MAXPOOL_OUTPUT_0_OUTPUTS_HEIGHT,
FEATURE_FEATURE_5_MAXPOOL_OUTPUT_0_OUTPUTS_WIDTH,
FEATURE_FEATURE_5_MAXPOOL_OUTPUT_0_PADDING_Y,
FEATURE_FEATURE_5_MAXPOOL_OUTPUT_0_PADDING_X,
FEATURE_FEATURE_5_MAXPOOL_OUTPUT_0_STRIDE_Y,
FEATURE_FEATURE_5_MAXPOOL_OUTPUT_0_STRIDE_X,
FEATURE_FEATURE_5_MAXPOOL_OUTPUT_0_KERNEL_HEIGHT,
FEATURE_FEATURE_5_MAXPOOL_OUTPUT_0_KERNEL_WIDTH,
FEATURE_FEATURE_5_MAXPOOL_OUTPUT_0_POOLING_TYPE,
FEATURE_FEATURE_5_MAXPOOL_OUTPUT_0_ACTIVATION>
(input, output);
printf("Consumed heap: %d\n", 8555404-heap_caps_get_free_size(MALLOC_CAP_8BIT));
//
//
input = (float*) realloc(input, CLASSIFIER_CLASSIFIER_1_GEMM_OUTPUT_0_NB_CHANNELS * CLASSIFIER_CLASSIFIER_1_GEMM_OUTPUT_0_CHANNELS_HEIGHT * CLASSIFIER_CLASSIFIER_1_GEMM_OUTPUT_0_CHANNELS_WIDTH * sizeof(float));
memcpy(input, output, CLASSIFIER_CLASSIFIER_1_GEMM_OUTPUT_0_NB_CHANNELS * CLASSIFIER_CLASSIFIER_1_GEMM_OUTPUT_0_CHANNELS_HEIGHT * CLASSIFIER_CLASSIFIER_1_GEMM_OUTPUT_0_CHANNELS_WIDTH * sizeof(float));
output = (float*) realloc(output, CLASSIFIER_CLASSIFIER_1_GEMM_OUTPUT_0_NB_OUTPUTS * CLASSIFIER_CLASSIFIER_1_GEMM_OUTPUT_0_OUTPUTS_HEIGHT * CLASSIFIER_CLASSIFIER_1_GEMM_OUTPUT_0_OUTPUTS_WIDTH * sizeof(float));
printf("Input + output %d\n", 4*(CLASSIFIER_CLASSIFIER_1_GEMM_OUTPUT_0_NB_CHANNELS * CLASSIFIER_CLASSIFIER_1_GEMM_OUTPUT_0_CHANNELS_HEIGHT * CLASSIFIER_CLASSIFIER_1_GEMM_OUTPUT_0_CHANNELS_WIDTH+ CLASSIFIER_CLASSIFIER_1_GEMM_OUTPUT_0_NB_OUTPUTS * CLASSIFIER_CLASSIFIER_1_GEMM_OUTPUT_0_OUTPUTS_HEIGHT * CLASSIFIER_CLASSIFIER_1_GEMM_OUTPUT_0_OUTPUTS_WIDTH));
{
#include "../parameters/classifier_1_weight.h"
#include "../parameters/classifier_1_bias.h"
fullyconnected_forward<CLASSIFIER_CLASSIFIER_1_GEMM_OUTPUT_0_NB_CHANNELS,
CLASSIFIER_CLASSIFIER_1_GEMM_OUTPUT_0_CHANNELS_HEIGHT,
CLASSIFIER_CLASSIFIER_1_GEMM_OUTPUT_0_CHANNELS_WIDTH,
CLASSIFIER_CLASSIFIER_1_GEMM_OUTPUT_0_NB_OUTPUTS,
CLASSIFIER_CLASSIFIER_1_GEMM_OUTPUT_0_OUTPUTS_HEIGHT,
CLASSIFIER_CLASSIFIER_1_GEMM_OUTPUT_0_OUTPUTS_WIDTH,
CLASSIFIER_CLASSIFIER_1_GEMM_OUTPUT_0_ACTIVATION>
(input, output, weights, bias, CLASSIFIER_CLASSIFIER_1_GEMM_OUTPUT_0_RESCALING);
printf("Consumed heap FC: %d\n", 8555404-heap_caps_get_free_size(MALLOC_CAP_8BIT));
}
printf("Consumed heap: %d\n", 8555404-heap_caps_get_free_size(MALLOC_CAP_8BIT));
//
//
input = (float*) realloc(input, CLASSIFIER_CLASSIFIER_1_GEMM_OUTPUT_0_NB_OUTPUTS * CLASSIFIER_CLASSIFIER_1_GEMM_OUTPUT_0_OUTPUTS_HEIGHT * CLASSIFIER_CLASSIFIER_1_GEMM_OUTPUT_0_OUTPUTS_WIDTH * sizeof(float));
memcpy(input, output, CLASSIFIER_CLASSIFIER_1_GEMM_OUTPUT_0_NB_OUTPUTS * CLASSIFIER_CLASSIFIER_1_GEMM_OUTPUT_0_OUTPUTS_HEIGHT * CLASSIFIER_CLASSIFIER_1_GEMM_OUTPUT_0_OUTPUTS_WIDTH * sizeof(float));
printf("Input + output %d\n", 8*CLASSIFIER_CLASSIFIER_1_GEMM_OUTPUT_0_NB_OUTPUTS * CLASSIFIER_CLASSIFIER_1_GEMM_OUTPUT_0_OUTPUTS_HEIGHT * CLASSIFIER_CLASSIFIER_1_GEMM_OUTPUT_0_OUTPUTS_WIDTH);
activation_forward<CLASSIFIER_CLASSIFIER_2_RELU_NB_DATA,
CLASSIFIER_CLASSIFIER_2_RELU_ACTIVATION>
(input, output, CLASSIFIER_CLASSIFIER_1_GEMM_OUTPUT_0_RESCALING);
printf("Consumed heap: %d\n", 8555404-heap_caps_get_free_size(MALLOC_CAP_8BIT));
//
//
input = (float*) realloc(input, CLASSIFIER_CLASSIFIER_3_GEMM_OUTPUT_0_NB_CHANNELS * CLASSIFIER_CLASSIFIER_3_GEMM_OUTPUT_0_CHANNELS_HEIGHT * CLASSIFIER_CLASSIFIER_3_GEMM_OUTPUT_0_CHANNELS_WIDTH * sizeof(float));
memcpy(input, output, CLASSIFIER_CLASSIFIER_3_GEMM_OUTPUT_0_NB_CHANNELS * CLASSIFIER_CLASSIFIER_3_GEMM_OUTPUT_0_CHANNELS_HEIGHT * CLASSIFIER_CLASSIFIER_3_GEMM_OUTPUT_0_CHANNELS_WIDTH * sizeof(float));
output = (float*) realloc(output, CLASSIFIER_CLASSIFIER_3_GEMM_OUTPUT_0_NB_OUTPUTS * CLASSIFIER_CLASSIFIER_3_GEMM_OUTPUT_0_OUTPUTS_HEIGHT * CLASSIFIER_CLASSIFIER_3_GEMM_OUTPUT_0_OUTPUTS_WIDTH * sizeof(float));
printf("Input + output %d\n", 4*(CLASSIFIER_CLASSIFIER_3_GEMM_OUTPUT_0_NB_CHANNELS * CLASSIFIER_CLASSIFIER_3_GEMM_OUTPUT_0_CHANNELS_HEIGHT * CLASSIFIER_CLASSIFIER_3_GEMM_OUTPUT_0_CHANNELS_WIDTH+ CLASSIFIER_CLASSIFIER_3_GEMM_OUTPUT_0_NB_OUTPUTS * CLASSIFIER_CLASSIFIER_3_GEMM_OUTPUT_0_OUTPUTS_HEIGHT * CLASSIFIER_CLASSIFIER_3_GEMM_OUTPUT_0_OUTPUTS_WIDTH));
{
#include "../parameters/classifier_3_weight.h"
#include "../parameters/classifier_3_bias.h"
fullyconnected_forward<CLASSIFIER_CLASSIFIER_3_GEMM_OUTPUT_0_NB_CHANNELS,
CLASSIFIER_CLASSIFIER_3_GEMM_OUTPUT_0_CHANNELS_HEIGHT,
CLASSIFIER_CLASSIFIER_3_GEMM_OUTPUT_0_CHANNELS_WIDTH,
CLASSIFIER_CLASSIFIER_3_GEMM_OUTPUT_0_NB_OUTPUTS,
CLASSIFIER_CLASSIFIER_3_GEMM_OUTPUT_0_OUTPUTS_HEIGHT,
CLASSIFIER_CLASSIFIER_3_GEMM_OUTPUT_0_OUTPUTS_WIDTH,
CLASSIFIER_CLASSIFIER_3_GEMM_OUTPUT_0_ACTIVATION>
(input, output, weights, bias, CLASSIFIER_CLASSIFIER_3_GEMM_OUTPUT_0_RESCALING);
}
printf("Consumed heap: %d\n", 8555404-heap_caps_get_free_size(MALLOC_CAP_8BIT));
//
//
input = (float*) realloc(input, CLASSIFIER_CLASSIFIER_3_GEMM_OUTPUT_0_NB_OUTPUTS * CLASSIFIER_CLASSIFIER_3_GEMM_OUTPUT_0_OUTPUTS_HEIGHT * CLASSIFIER_CLASSIFIER_3_GEMM_OUTPUT_0_OUTPUTS_WIDTH * sizeof(float));
memcpy(input, output, CLASSIFIER_CLASSIFIER_3_GEMM_OUTPUT_0_NB_OUTPUTS * CLASSIFIER_CLASSIFIER_3_GEMM_OUTPUT_0_OUTPUTS_HEIGHT * CLASSIFIER_CLASSIFIER_3_GEMM_OUTPUT_0_OUTPUTS_WIDTH * sizeof(float));
printf("Input + output %d\n", 8*CLASSIFIER_CLASSIFIER_3_GEMM_OUTPUT_0_NB_OUTPUTS * CLASSIFIER_CLASSIFIER_3_GEMM_OUTPUT_0_OUTPUTS_HEIGHT * CLASSIFIER_CLASSIFIER_3_GEMM_OUTPUT_0_OUTPUTS_WIDTH);
activation_forward<CLASSIFIER_CLASSIFIER_4_RELU_NB_DATA,
CLASSIFIER_CLASSIFIER_4_RELU_ACTIVATION>
(input, output, CLASSIFIER_CLASSIFIER_3_GEMM_OUTPUT_0_RESCALING);
printf("Consumed heap: %d\n", 8555404-heap_caps_get_free_size(MALLOC_CAP_8BIT));
//
//
input = (float*) realloc(input, OUTPUT_NB_CHANNELS * OUTPUT_CHANNELS_HEIGHT * OUTPUT_CHANNELS_WIDTH * sizeof(float));
memcpy(input, output, OUTPUT_NB_CHANNELS * OUTPUT_CHANNELS_HEIGHT * OUTPUT_CHANNELS_WIDTH * sizeof(float));
output = (float*) realloc(output, OUTPUT_NB_OUTPUTS * OUTPUT_OUTPUTS_HEIGHT * OUTPUT_OUTPUTS_WIDTH * sizeof(float));
printf("Input + output %d\n", 4*(OUTPUT_NB_CHANNELS * OUTPUT_CHANNELS_HEIGHT * OUTPUT_CHANNELS_WIDTH+ OUTPUT_NB_OUTPUTS * OUTPUT_OUTPUTS_HEIGHT * OUTPUT_OUTPUTS_WIDTH));
printf("Consumed heap: %d\n", 8555404-heap_caps_get_free_size(MALLOC_CAP_8BIT));
{
#include "../parameters/classifier_5_weight.h"
#include "../parameters/classifier_5_bias.h"
/*
fullyconnected_forward<OUTPUT_NB_CHANNELS,
OUTPUT_CHANNELS_HEIGHT,
OUTPUT_CHANNELS_WIDTH,
OUTPUT_NB_OUTPUTS,
OUTPUT_OUTPUTS_HEIGHT,
OUTPUT_OUTPUTS_WIDTH,
OUTPUT_ACTIVATION>
(input, output, weights, bias, OUTPUT_RESCALING);
*/
}
//
//
memcpy(result, output, OUTPUT_NB_OUTPUTS * OUTPUT_OUTPUTS_HEIGHT * OUTPUT_OUTPUTS_WIDTH * sizeof(float));
}
/*
Layers statistics:
convolution: Input: 784, Output: 4704, Weights: 150, Biases: 6, Total: 5644, RAM: 22576 B
activation: Input: 4704, Output: 4704, Total: 9408, RAM: 37632 B
pooling: Input: 4704, Output: 1176, Total: 5880, RAM: 23520 B
convolution: Input: 1176, Output: 1600, Weights: 2400, Biases: 16, Total: 5192, RAM: 20768 B
activation: Input: 1600, Output: 1600, Total: 3200, RAM: 12800 B
pooling: Input: 1600, Output: 400, Total: 2000, RAM: 8000 B
fullyconnected: Input: 400, Output: 120, Weights: 48000, Biases: 120, Total: 48640, RAM: 194560 B
activation: Input: 120, Output: 120, Total: 240, RAM: 960 B
fullyconnected: Input: 120, Output: 84, Weights: 10080, Biases: 84, Total: 10368, RAM: 41472 B
activation: Input: 84, Output: 84, Total: 168, RAM: 672 B
fullyconnected: Input: 84, Output: 10, Weights: 840, Biases: 10, Total: 944, RAM: 3776 B
Maximum RAM required: 194560 B (191 kB)
*/
Here is the output when the last layer is commented out:
And there is nothing on the monitor when it is uncommented!!Executing CPP export
Initial heap: 8555404
Input + output 21952
Consumed heap: 22184
Input + output 37632
Consumed heap: 37864
Input + output 23520
Consumed heap: 23752
Input + output 11104
Consumed heap: 11336
Input + output 12800
Consumed heap: 13032
Input + output 8000
Consumed heap: 8232
Input + output 2080
Consumed heap FC: 2312
Consumed heap: 2312
Input + output 960
Consumed heap: 1192
Input + output 816
Consumed heap: 1048
Input + output 672
Consumed heap: 904
Input + output 376
Consumed heap: 608
0: 0
1: 8.55611
2: 0
3: 0
4: 0
5: 0
6: 0
7: 2.1693
8: 0
9: 0
Execution time: 57 ms
When I print the free heap using
Code: Select all
heap_caps_get_free_size(MALLOC_CAP_8BIT)
Can anyone help or provide some advice to find the source of the problem?
Thanks a lot.