CrashReporter: fix deadlocks by making it mostly async-signal-safe (#5771)

`CrashReporter::createAndSaveCrash()` is not async-signal-safe,
resulting in random deadlocks/double-crashes during Hyprland crashes.
This changes the function to be (mostly) async-signal-safe.
This commit is contained in:
virchau13 2024-04-28 00:38:48 +08:00 committed by GitHub
parent 55490637aa
commit 90a53aed59
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 351 additions and 70 deletions

View File

@ -36,6 +36,14 @@ void handleUnrecoverableSignal(int sig) {
return;
}
// Kill the program if the crash-reporter is caught in a deadlock.
signal(SIGALRM, [](int _) {
char const* msg = "\nCrashReporter exceeded timeout, forcefully exiting\n";
write(2, msg, strlen(msg));
abort();
});
alarm(15);
CrashReporter::createAndSaveCrash(sig);
abort();

View File

@ -1,59 +1,127 @@
#include "CrashReporter.hpp"
#include <random>
#include <fcntl.h>
#include <sys/utsname.h>
#include <fstream>
#include <signal.h>
#include <link.h>
#include <time.h>
#include <errno.h>
#include <sys/stat.h>
#include "../plugins/PluginSystem.hpp"
#include "../signal-safe.hpp"
#if defined(__DragonFly__) || defined(__FreeBSD__) || defined(__NetBSD__)
#include <sys/sysctl.h>
#endif
std::string getRandomMessage() {
static char const* const MESSAGES[] = {"Sorry, didn't mean to...",
"This was an accident, I swear!",
"Calm down, it was a misinput! MISINPUT!",
"Oops",
"Vaxry is going to be upset.",
"Who tried dividing by zero?!",
"Maybe you should try dusting your PC in the meantime?",
"I tried so hard, and got so far...",
"I don't feel so good...",
"*thud*",
"Well this is awkward.",
"\"stable\"",
"I hope you didn't have any unsaved progress.",
"All these computers..."};
const std::vector<std::string> MESSAGES = {"Sorry, didn't mean to...",
"This was an accident, I swear!",
"Calm down, it was a misinput! MISINPUT!",
"Oops",
"Vaxry is going to be upset.",
"Who tried dividing by zero?!",
"Maybe you should try dusting your PC in the meantime?",
"I tried so hard, and got so far...",
"I don't feel so good...",
"*thud*",
"Well this is awkward.",
"\"stable\"",
"I hope you didn't have any unsaved progress.",
"All these computers..."};
// <random> is not async-signal-safe, fake it with time(NULL) instead
char const* getRandomMessage() {
return MESSAGES[time(NULL) % (sizeof(MESSAGES) / sizeof(MESSAGES[0]))];
}
std::random_device dev;
std::mt19937 engine(dev());
std::uniform_int_distribution<> distribution(0, MESSAGES.size() - 1);
return MESSAGES[distribution(engine)];
[[noreturn]] inline void exit_with_error(char const* err) {
write(STDERR_FILENO, err, strlen(err));
// perror() is not signal-safe, but we use it here
// because if the crash-handler already crashed, it can't get any worse.
perror("");
abort();
}
void CrashReporter::createAndSaveCrash(int sig) {
int reportFd;
// get the backtrace
const int PID = getpid();
// We're in the signal handler, so we *only* have stack memory.
// To save as much stack memory as possible,
// destroy things as soon as possible.
{
MaxLengthCString<255> reportPath;
std::string finalCrashReport = "";
const auto HOME = sig_getenv("HOME");
const auto CACHE_HOME = sig_getenv("XDG_CACHE_HOME");
if (CACHE_HOME && CACHE_HOME[0] != '\0') {
reportPath += CACHE_HOME;
reportPath += "/hyprland";
} else if (HOME && HOME[0] != '\0') {
reportPath += HOME;
reportPath += "/.cache/hyprland";
} else {
exit_with_error("$CACHE_HOME and $HOME not set, nowhere to report crash\n");
return;
}
int ret = mkdir(reportPath.get_str(), S_IRWXU);
//__asm__("int $3");
if (ret < 0 && errno != EEXIST) {
exit_with_error("failed to mkdir() crash report directory\n");
}
reportPath += "/hyprlandCrashReport";
reportPath.write_num(getpid());
reportPath += ".txt";
{
BufFileWriter<64> stderr(2);
stderr += "Hyprland has crashed :( Consult the crash report at ";
if (!reportPath.boundsExceeded()) {
stderr += reportPath.get_str();
} else {
stderr += "[ERROR: Crash report path does not fit into memory! Check if your $CACHE_HOME/$HOME is too deeply nested. Max 255 characters.]";
}
stderr += " for more information.\n";
stderr.flush();
}
reportFd = open(reportPath.get_str(), O_WRONLY | O_CREAT, S_IRWXU);
if (reportFd < 0) {
exit_with_error("Failed to open crash report path for writing");
}
}
BufFileWriter<512> finalCrashReport(reportFd);
finalCrashReport += "--------------------------------------------\n Hyprland Crash Report\n--------------------------------------------\n";
finalCrashReport += getRandomMessage() + "\n\n";
finalCrashReport += getRandomMessage();
finalCrashReport += "\n\n";
finalCrashReport += std::format("Hyprland received signal {} ({})\n\n", sig, (const char*)strsignal(sig));
finalCrashReport += "Hyprland received signal ";
finalCrashReport.writeNum(sig);
finalCrashReport += '(';
finalCrashReport += sig_strsignal(sig);
finalCrashReport += ")\nVersion: ";
finalCrashReport += GIT_COMMIT_HASH;
finalCrashReport += "\nTag: ";
finalCrashReport += GIT_TAG;
finalCrashReport += "\n\n";
finalCrashReport += std::format("Version: {}\nTag: {}\n\n", GIT_COMMIT_HASH, GIT_TAG);
if (g_pPluginSystem && !g_pPluginSystem->getAllPlugins().empty()) {
if (g_pPluginSystem && g_pPluginSystem->pluginCount() > 0) {
finalCrashReport += "Hyprland seems to be running with plugins. This crash might not be Hyprland's fault.\nPlugins:\n";
for (auto& p : g_pPluginSystem->getAllPlugins()) {
finalCrashReport += std::format("\t{} ({}) {}\n", p->name, p->author, p->version);
size_t count = g_pPluginSystem->pluginCount();
CPlugin* plugins[count];
g_pPluginSystem->sig_getPlugins(plugins, count);
for (size_t i = 0; i < count; i++) {
auto p = plugins[i];
finalCrashReport += '\t';
finalCrashReport += p->name;
finalCrashReport += " (";
finalCrashReport += p->author;
finalCrashReport += ") ";
finalCrashReport += p->version;
finalCrashReport += '\n';
}
finalCrashReport += "\n\n";
@ -61,21 +129,36 @@ void CrashReporter::createAndSaveCrash(int sig) {
finalCrashReport += "System info:\n";
struct utsname unameInfo;
uname(&unameInfo);
{
struct utsname unameInfo;
uname(&unameInfo);
finalCrashReport += std::format("\tSystem name: {}\n\tNode name: {}\n\tRelease: {}\n\tVersion: {}\n\n", std::string{unameInfo.sysname}, std::string{unameInfo.nodename},
std::string{unameInfo.release}, std::string{unameInfo.version});
finalCrashReport += "\tSystem name: ";
finalCrashReport += unameInfo.sysname;
finalCrashReport += "\n\tNode name: ";
finalCrashReport += unameInfo.nodename;
finalCrashReport += "\n\tRelease: ";
finalCrashReport += unameInfo.release;
finalCrashReport += "\n\tVersion: ";
finalCrashReport += unameInfo.version;
finalCrashReport += "\n\n";
}
finalCrashReport += "GPU:\n\t";
#if defined(__DragonFly__) || defined(__FreeBSD__)
const std::string GPUINFO = execAndGet("pciconf -lv | fgrep -A4 vga");
finalCrashReport.writeCmdOutput("pciconf -lv | fgrep -A4 vga");
#else
const std::string GPUINFO = execAndGet("lspci -vnn | grep VGA");
finalCrashReport.writeCmdOutput("lspci -vnn | grep VGA");
#endif
finalCrashReport += "GPU:\n\t" + GPUINFO;
finalCrashReport += "\n\nos-release:\n";
finalCrashReport.writeCmdOutput("cat /etc/os-release | sed 's/^/\t/'");
finalCrashReport += std::format("\n\nos-release:\n\t{}\n\n\n", replaceInString(execAndGet("cat /etc/os-release"), "\n", "\n\t"));
// dladdr1()/backtrace_symbols()/this entire section allocates, and hence is NOT async-signal-safe.
// Make sure that we save the current known crash report information,
// so that if we are caught in a deadlock during a call to malloc(),
// there is still something to debug from.
finalCrashReport.flush();
finalCrashReport += "Backtrace:\n";
@ -132,7 +215,10 @@ void CrashReporter::createAndSaveCrash(int sig) {
std::stringstream ssin(ADDR2LINE);
for (size_t i = 0; i < CALLSTACK.size(); ++i) {
finalCrashReport += std::format("\t#{} | {}", i, CALLSTACK[i].desc);
finalCrashReport += "\t#";
finalCrashReport.writeNum(i);
finalCrashReport += " | ";
finalCrashReport += CALLSTACK[i].desc;
std::string functionInfo;
std::string fileLineInfo;
std::getline(ssin, functionInfo);
@ -142,32 +228,5 @@ void CrashReporter::createAndSaveCrash(int sig) {
finalCrashReport += "\n\nLog tail:\n";
finalCrashReport += Debug::rollingLog.substr(Debug::rollingLog.find("\n") + 1);
const auto HOME = getenv("HOME");
const auto CACHE_HOME = getenv("XDG_CACHE_HOME");
if (!HOME)
return;
std::ofstream ofs;
std::string reportDir;
if (!CACHE_HOME || std::string(CACHE_HOME).empty())
reportDir = std::string(HOME) + "/.cache/hyprland";
else
reportDir = std::string(CACHE_HOME) + "/hyprland";
if (!std::filesystem::exists(reportDir))
std::filesystem::create_directory(reportDir);
const auto path = reportDir + "/hyprlandCrashReport" + std::to_string(PID) + ".txt";
ofs.open(path, std::ios::trunc);
ofs << finalCrashReport;
ofs.close();
Debug::disableStdout = false;
Debug::log(CRIT, "Hyprland has crashed :( Consult the crash report at {} for more information.", path);
finalCrashReport += std::string_view(Debug::rollingLog).substr(Debug::rollingLog.find("\n") + 1);
}

View File

@ -194,3 +194,13 @@ std::vector<CPlugin*> CPluginSystem::getAllPlugins() {
results[i] = m_vLoadedPlugins[i].get();
return results;
}
size_t CPluginSystem::pluginCount() {
return m_vLoadedPlugins.size();
}
void CPluginSystem::sig_getPlugins(CPlugin** data, size_t len) {
for (size_t i = 0; i < std::min(m_vLoadedPlugins.size(), len); i++) {
data[i] = m_vLoadedPlugins[i].get();
}
}

View File

@ -37,6 +37,8 @@ class CPluginSystem {
CPlugin* getPluginByPath(const std::string& path);
CPlugin* getPluginByHandle(HANDLE handle);
std::vector<CPlugin*> getAllPlugins();
size_t pluginCount();
void sig_getPlugins(CPlugin** data, size_t len);
bool m_bAllowConfigVars = false;
std::string m_szLastError = "";

30
src/signal-safe.cpp Normal file
View File

@ -0,0 +1,30 @@
#include "signal-safe.hpp"
#ifndef __GLIBC__
#include <signal.h>
#endif
#include <fcntl.h>
#include <unistd.h>
#include <string.h>
extern char** environ;
char const* sig_getenv(char const* name) {
int len = strlen(name);
for (char** var = environ; *var != NULL; var++) {
if (strncmp(*var, name, len) == 0 && (*var)[len] == '=') {
return (*var) + len + 1;
}
}
return NULL;
}
char const* sig_strsignal(int sig) {
#ifdef __GLIBC__
return sigabbrev_np(sig);
#elif defined(__DragonFly__) || defined(__FreeBSD__)
return sys_signame[sig];
#else
return "unknown";
#endif
}

172
src/signal-safe.hpp Normal file
View File

@ -0,0 +1,172 @@
#pragma once
#include "defines.hpp"
template <uint16_t N>
class MaxLengthCString {
public:
MaxLengthCString() : m_strPos(0), m_boundsExceeded(false) {
m_str[0] = '\0';
}
inline void operator+=(char const* rhs) {
write(rhs, strlen(rhs));
}
void write(char const* data, size_t len) {
if (m_boundsExceeded || m_strPos + len >= N) {
m_boundsExceeded = true;
return;
}
memcpy(m_str + m_strPos, data, len);
m_strPos += len;
m_str[m_strPos] = '\0';
}
void write(char c) {
if (m_boundsExceeded || m_strPos + 1 >= N) {
m_boundsExceeded = true;
return;
}
m_str[m_strPos] = c;
m_strPos++;
}
void write_num(size_t num) {
size_t d = 1;
while (num / 10 >= d)
d *= 10;
while (num > 0) {
char c = '0' + (num / d);
write(c);
num %= d;
d /= 10;
}
}
char const* get_str() {
return m_str;
};
bool boundsExceeded() {
return m_boundsExceeded;
};
private:
char m_str[N];
size_t m_strPos;
bool m_boundsExceeded;
};
template <uint16_t BUFSIZE>
class BufFileWriter {
public:
inline BufFileWriter(int fd_) : m_writeBufPos(0), m_fd(fd_) {}
~BufFileWriter() {
flush();
}
void write(char const* data, size_t len) {
while (len > 0) {
size_t to_add = std::min(len, (size_t)BUFSIZE - m_writeBufPos);
memcpy(m_writeBuf + m_writeBufPos, data, to_add);
data += to_add;
len -= to_add;
m_writeBufPos += to_add;
if (m_writeBufPos == BUFSIZE)
flush();
}
}
inline void write(char c) {
if (m_writeBufPos == BUFSIZE)
flush();
m_writeBuf[m_writeBufPos] = c;
m_writeBufPos++;
}
inline void operator+=(char const* str) {
write(str, strlen(str));
}
inline void operator+=(std::string_view str) {
write(str.data(), str.size());
}
inline void operator+=(char c) {
write(c);
}
void writeNum(size_t num) {
size_t d = 1;
while (num / 10 >= d)
d *= 10;
while (num > 0) {
char c = '0' + (num / d);
write(c);
num %= d;
d /= 10;
}
}
void writeCmdOutput(const char* cmd) {
int pipefd[2];
if (pipe(pipefd) < 0) {
*this += "<pipe(pipefd) failed with";
writeNum(errno);
*this += ">\n";
return;
}
// terminate child instead of waiting
{
struct sigaction act;
act.sa_handler = SIG_DFL;
sigemptyset(&act.sa_mask);
act.sa_flags = SA_NOCLDWAIT;
act.sa_restorer = NULL;
sigaction(SIGCHLD, &act, NULL);
}
pid_t pid = fork();
if (pid < 0) {
*this += "<fork() failed with ";
writeNum(errno);
*this += ">\n";
return;
}
if (pid == 0) {
close(pipefd[0]);
dup2(pipefd[1], STDOUT_FILENO);
char const* const argv[] = {"/bin/sh", "-c", cmd};
execv("/bin/sh", (char* const*)argv);
BufFileWriter<64> failmsg(pipefd[1]);
failmsg += "<execv(";
failmsg += cmd;
failmsg += ") resulted in errno ";
failmsg.write(errno);
failmsg += ">\n";
close(pipefd[1]);
abort();
} else {
close(pipefd[1]);
int len;
char readbuf[256];
while ((len = read(pipefd[0], readbuf, 256)) > 0) {
write(readbuf, len);
}
if (len < 0) {
*this += "<interrupted, read() resulted in errno ";
writeNum(errno);
*this += ">\n";
}
close(pipefd[0]);
}
}
void flush() {
size_t i = 0;
while (i < m_writeBufPos) {
int written = ::write(m_fd, m_writeBuf + i, m_writeBufPos - i);
if (written <= 0) {
return;
}
i += written;
}
m_writeBufPos = 0;
}
private:
char m_writeBuf[BUFSIZE];
size_t m_writeBufPos;
int m_fd;
};
char const* sig_getenv(char const* name);
char const* sig_strsignal(int sig);