Disconnection due to "esp-tls: [sock=54] select() timeout" when switching ports.

shovnik2000
Posts: 1
Joined: Wed Oct 09, 2024 3:05 pm

Disconnection due to "esp-tls: [sock=54] select() timeout" when switching ports.

Postby shovnik2000 » Wed Oct 09, 2024 3:54 pm

Hi everyone,

I'm a junior firmware developer relatively new to the ESP IDF (I previously worked mostly with the Arduino port for ESP32 with FreeRTOS). I was recently tasked with debugging a problem in some legacy code running on already deployed devices. The firmware in these devices use MQTT for communicating with our app and backend. The problem that we have identified so far is that these devices use port 1883 (typically used for unencrypted MQTT traffic) which seems to be blocked within the local networks of some retail stores where these devices are being showcased.

Our solution for this is to keep this port (1883) as a default and if a connection to the MQTT broker fails, try other ports (pre-defined in a list) that are more likely to be open (like 8883 for example which is used for encrypted traffic):

Code: Select all

static const int ports[] = { 1883, 8883, 443, /* some more ports to try... */ 0 };

static void al_cloud_task(void* pvParameters)
{
    esp_err_t err = ESP_OK;
    const int *port = ports;

    // 1. Get WiFi credentials using BluFi
    ...
    // 2. Connect to WiFi with those credentials.
    ...
    
    // 3. Connect to MQTT broker once connected to local network.
    bool broker_connected = false;
    while (*port != 0) // Iterate over all ports.
    {
        ESP_LOGI(TAG, "Trying port=%u", *port);
        config.port = *port;
        ESP_ERROR_CHECK(al_mqtt_init(&config));
        ESP_ERROR_CHECK(al_mqtt_start_connect());          // WiFi连接成功,开始连接MQTT
        err = al_mqtt_wait_client_connect(5000);  // 等待mqtt连接
        if (err == ESP_OK) {
            ESP_LOGI(TAG, "MQTT connect success @port=%u", *port);
            broker_connected = true;
            break;
        }
        ESP_LOGE(TAG, "MQTT connect failed @port=%u", *port);
        port++; // Try next port.
    }
    if (!broker_connected) {
        ESP_LOGE(TAG, "Tried all ports, couldn't connect to MQTT broker");
        goto _FAILED;
    }
    
    // 4. Post connection operations + optional cleanup.
    ...
_FAILED:
    vTaskDelete(NULL);
}
Now this is where it gets a little tricky for me: some of these functions like al_mqtt_init, al_mqtt_start_connect and al_mqtt_wait_client_connect - as you'll know are not standard ESP-IDF API functions. As I understand, they're just wrappers over the IDF MQTT API functions and are defined in a separate component within the components directory:

Code: Select all

esp_err_t al_mqtt_init(const mqtt_config_t* config)
{
    AL_MQTT_PARAM_CHECK(config, return ESP_ERR_INVALID_ARG);

    if (!s_mqtt_msg) {
        s_mqtt_msg = (mqtt_msg_t*)calloc(1, sizeof(mqtt_msg_t));
        AL_MQTT_CHECK_RETURN(!s_mqtt_msg, return ESP_FAIL, "s_mqtt_msg allocation failed");
        s_mqtt_msg->config = (mqtt_config_t*)calloc(1, sizeof(mqtt_config_t));
        AL_MQTT_CHECK_GOTO(s_mqtt_msg->config, "Memory allocation failed", _FAILED);
        *s_mqtt_msg->config = *config;
    }

    const esp_mqtt_client_config_t cfg = {
#if (ESP_IDF_VERSION >= ESP_IDF_VERSION_VAL(5, 0, 0))
        .broker.address =
            {
                .hostname  = config->host,
                .port      = config->port,
                .transport = MQTT_TRANSPORT_OVER_TCP,
            },
        .credentials =
            {
                .username                = config->username,
                .client_id               = config->client_id,
                .authentication.password = config->password,
            },
        .network.reconnect_timeout_ms = 1000,
#else
        .uri                  = config->uri,
        .port                 = config->port,
        .host                 = config->host,
        .username             = config->username,
        .password             = config->password,
        .client_id            = config->client_id,
        .reconnect_timeout_ms = 1000,
#endif
    };

    if (!s_mqtt_msg->event_group) {
        s_mqtt_msg->event_group = xEventGroupCreate();
    }
    if (!s_mqtt_msg->mutex) {
        s_mqtt_msg->mutex = xSemaphoreCreateMutex();
    }
    s_mqtt_msg->client = esp_mqtt_client_init(&cfg);
    esp_mqtt_client_register_event(s_mqtt_msg->client, ESP_EVENT_ANY_ID, _event_handler, NULL);
    return ESP_OK;

_FAILED:
    free(s_mqtt_msg);
    s_mqtt_msg = NULL;
    return ESP_FAIL;
}

Code: Select all

esp_err_t al_mqtt_start_connect(void)
{
    AL_MQTT_INIT_CHECK(s_mqtt_msg, return ESP_FAIL);
    AL_MQTT_CHECK_RETURN(s_mqtt_msg->is_connected, return ESP_OK, "MQTT is already connected");
    esp_err_t ret = esp_mqtt_client_start(s_mqtt_msg->client);
    AL_MQTT_CHECK_RETURN(ret != ESP_OK, return ret, "esp_mqtt_client_start() failed with error %x(%s)", ret,
                         esp_err_to_name(ret));
    return ESP_OK;
}

Code: Select all

esp_err_t al_mqtt_wait_client_connect(uint32_t timeout)
{
    EventBits_t bits = xEventGroupWaitBits(s_mqtt_msg->event_group, MQTT_CONNECTED_EVENT | MQTT_CONNECTED_REFUSE_EVENT,
                                           false, false, pdMS_TO_TICKS(timeout));

    if (bits & MQTT_CONNECTED_EVENT) {
        s_mqtt_msg->is_connected = true;
        return ESP_OK;
    }
    if (bits & MQTT_CONNECTED_REFUSE_EVENT) {
        // esp_mqtt_client_stop(s_mqtt_msg->client);
        s_mqtt_msg->is_connected = false;
        return ESP_FAIL;
    }
    return ESP_ERR_TIMEOUT;
}
To test the port switching logic, our backend admin decided to block port 1883 on the broker itself (in a non-production environment of course) while I ran this code. What I observed is that the port switching actually works (it gives up trying to connect on port 1883 and instead tries port 8883 and even succeeds!) but a few seconds after connecting to the next port candidate, it disconnects automatically with the following error:

Code: Select all

esp-tls: [sock=54] select() timeout
and does not reconnect. Any insight into why this might be happening will be highly appreciated. Thank you.

euripedes
Posts: 7
Joined: Tue May 15, 2018 12:04 pm

Re: Disconnection due to "esp-tls: [sock=54] select() timeout" when switching ports.

Postby euripedes » Mon Oct 14, 2024 1:03 pm

Hi, could you share the logs from the mqtt client?

Who is online

Users browsing this forum: No registered users and 254 guests