I face an issue that happens when connecting to a remote host over WiFi using LWIP. It happens every couple of transmission cycles seemingly randomly after a wakeup from deep sleep or cold boot.
The error returned from connect() is -1, with an errno of 128, indicating that the socket is not connected.
After activating LWIP Debug output, the following error is given during the initial connection sequence triggered by lwip_connect:
Code: Select all
ip4_route: No route to 91.121.93.94
lwip_connect(54) failed, err=-4
The message repeats for a while and the connection attempt ultimately fails with an error code of -15 indicating the connection has been closed.
Code: Select all
tcp_slowtmr: processing active pcb
tcp_slowtmr: max SYN retries reached
tcp_pcb_purge
tcp_pcb_purge: data left on ->unacked
pbuf_free(0x3fccbc5c)
pbuf_free: deallocating 0x3fccbc5c
lwip_connect(54, addr=91.121.93.94 port=8883)
lwip_connect(54) failed, err=-15
All subsequent efforts to connect to said host will result in the same error pattern, except a reboot is triggered or the device is commanded into deep sleep.
The connection routine is employed in the following manner:
Code: Select all
commdev::errlvl_t esp32s3_wifi::socket_connect(unsigned int sock_id, const commdev::socket_connection_t &socket_connection) {
sockaddr_in addr;
int ret;
auto conn = socket_connection;
if (!conn.addr_remote.size()) {
std::stringstream ss;
ss << "no remote given";
LOG_FULL_ERROR(ss.str());
return commdev::E_ERR;
}
memset(&addr, 0, sizeof(sockaddr_in));
ip_addr_t target_addr;
struct addrinfo hint;
struct addrinfo *res = NULL;
memset(&hint, 0, sizeof(hint));
memset(&target_addr, 0, sizeof(target_addr));
int get_res = getaddrinfo(conn.addr_remote.c_str(), std::to_string(conn.port_remote).c_str(), &hint, &res);
if (0 != get_res || NULL == res) {
std::stringstream ss;
ss << "getaddrinfo() failed for \"" << conn.addr_remote << "\". Result: " << errno << ", addr_info:" << res << ", errno:" << errno;
LOG_FULL_ERROR(ss.str());
if (res) {
freeaddrinfo(res);
}
error(NL_SOCKET_ADDR_RES_ERROR);
error_extended(errno);
return commdev::E_ERR;
}
struct in_addr addr4 = ((struct sockaddr_in*) (res->ai_addr))->sin_addr;
inet_addr_to_ip4addr(ip_2_ip4(&target_addr), &addr4);
freeaddrinfo(res);
std::string s_host_res;
{
char str[INET_ADDRSTRLEN];
inet_ntop(AF_INET, &addr4, str, INET_ADDRSTRLEN);
s_host_res = std::string(str);
}
addr.sin_family = AF_INET;
addr.sin_port = htons((unsigned short )conn.port_remote);
addr.sin_addr.s_addr = inet_addr(s_host_res.c_str());
{
std::stringstream ss;
ss << "Connecting to " << conn.addr_remote << " [" << s_host_res << "]@" << conn.port_remote;
LOG_FULL_INFO(ss.str());
}
psense::os::core::timer tim_conn(120s);
while (!tim_conn.is_timeout()) {
ret = ::connect(m_sockfd, (struct sockaddr*) &addr, sizeof(addr));
if (ret != 0) {
if (EINPROGRESS == errno || EAGAIN == errno) {
std::this_thread::sleep_for(100ms);
continue;
} else if (EHOSTUNREACH == errno) {
LOG_FULL_DEBUG("EHOSTUNREACH");
std::this_thread::sleep_for(100ms);
continue;
} else if (EISCONN == errno || EALREADY == errno) {
// test if socket is writable and 3 way handshake completed
struct timeval timeout;
fd_set writing;
/* initialize the bit sets */
FD_ZERO(&writing);
/* add r, w, and e to the appropriate bit set */
FD_SET(m_sockfd, &writing);
memset(&timeout, 0, sizeof(timeout));
int rc = select(m_sockfd + 1, NULL, &writing, NULL, &timeout);
if (rc < 0) {
/* an error occurred during the select() */
LOG_FULL_ERROR("select error");
} else if (rc == 0) {
/* none of the sockets were ready in our little poll */
LOG_FULL_ERROR("socket not ready\n");
} else {
/* at least one of the sockets is ready */
if (FD_ISSET(m_sockfd, &writing)) {
LOG_FULL_INFO("Socket ready");
break;
} else {
LOG_FULL_ERROR("Socket not ready");
}
}
std::this_thread::sleep_for(100ms);
continue;
}
std::stringstream ss;
ss << "connect() failed. Ret = " << ret << ", errno: " << errno;
LOG_FULL_ERROR(ss.str());
error(NL_SOCKET_CONNECT_ERROR);
error_extended(errno);
return commdev::E_ERR;
} else {
break;
}
}
if (tim_conn.is_timeout()) {
LOG_FULL_ERROR("Timeout");
return commdev::E_ERR;
}
LOG_FULL_INFO("Connected");
error(NL_NO_ERROR);
error_extended(NL_NO_ERROR);
return commdev::E_OK;
}
What could be the cause of this behaviour and what could be done to circumvent it?