From 9182ffb1573c77367ad6b5e4b1f3e4f52b3c3ea4 Mon Sep 17 00:00:00 2001 From: Andrew Branson Date: Mon, 9 Mar 2026 09:43:54 +0100 Subject: Fix Mastodon sync and transfer reliability edge cases --- common/mastodontextutils.h | 122 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 122 insertions(+) create mode 100644 common/mastodontextutils.h (limited to 'common/mastodontextutils.h') diff --git a/common/mastodontextutils.h b/common/mastodontextutils.h new file mode 100644 index 0000000..bde74c4 --- /dev/null +++ b/common/mastodontextutils.h @@ -0,0 +1,122 @@ +/* + * Copyright (C) 2013-2026 Jolla Ltd. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef MASTODONTEXTUTILS_H +#define MASTODONTEXTUTILS_H + +#include +#include +#include + +namespace MastodonTextUtils { + +inline QString decodeHtmlEntities(QString text) +{ + text.replace(QStringLiteral("""), QStringLiteral("\"")); + text.replace(QStringLiteral("'"), QStringLiteral("'")); + text.replace(QStringLiteral("<"), QStringLiteral("<")); + text.replace(QStringLiteral(">"), QStringLiteral(">")); + text.replace(QStringLiteral("&"), QStringLiteral("&")); + text.replace(QStringLiteral(" "), QStringLiteral(" ")); + + static const QRegularExpression decimalEntity(QStringLiteral("&#(\\d+);")); + QRegularExpressionMatch match; + int index = 0; + while ((index = text.indexOf(decimalEntity, index, &match)) != -1) { + bool ok = false; + const uint value = match.captured(1).toUInt(&ok, 10); + QString replacement; + if (ok && value > 0 && value <= 0x10FFFF) { + replacement = QString::fromUcs4(&value, 1); + } + text.replace(index, match.capturedLength(0), replacement); + index += replacement.size(); + } + + static const QRegularExpression hexEntity(QStringLiteral("&#x([0-9a-fA-F]+);")); + index = 0; + while ((index = text.indexOf(hexEntity, index, &match)) != -1) { + bool ok = false; + const uint value = match.captured(1).toUInt(&ok, 16); + QString replacement; + if (ok && value > 0 && value <= 0x10FFFF) { + replacement = QString::fromUcs4(&value, 1); + } + text.replace(index, match.capturedLength(0), replacement); + index += replacement.size(); + } + + return text; +} + +inline QString sanitizeContent(const QString &content) +{ + QString plain = content; + plain.replace(QRegularExpression(QStringLiteral("<\\s*br\\s*/?\\s*>"), QRegularExpression::CaseInsensitiveOption), + QStringLiteral("\n")); + plain.replace(QRegularExpression(QStringLiteral("<\\s*/\\s*p\\s*>"), QRegularExpression::CaseInsensitiveOption), + QStringLiteral("\n")); + plain.remove(QRegularExpression(QStringLiteral("<[^>]+>"), QRegularExpression::CaseInsensitiveOption)); + + return decodeHtmlEntities(plain).trimmed(); +} + +inline QDateTime parseTimestamp(const QString ×tampString) +{ + QDateTime timestamp; + +#if QT_VERSION >= QT_VERSION_CHECK(5, 8, 0) + timestamp = QDateTime::fromString(timestampString, Qt::ISODateWithMs); + if (timestamp.isValid()) { + return timestamp; + } +#endif + + timestamp = QDateTime::fromString(timestampString, Qt::ISODate); + if (timestamp.isValid()) { + return timestamp; + } + + // Qt 5.6 cannot parse ISO-8601 timestamps with fractional seconds. + const int timeSeparator = timestampString.indexOf(QLatin1Char('T')); + const int fractionSeparator = timestampString.indexOf(QLatin1Char('.'), timeSeparator + 1); + if (timeSeparator > -1 && fractionSeparator > -1) { + int timezoneSeparator = timestampString.indexOf(QLatin1Char('Z'), fractionSeparator + 1); + if (timezoneSeparator == -1) { + timezoneSeparator = timestampString.indexOf(QLatin1Char('+'), fractionSeparator + 1); + } + if (timezoneSeparator == -1) { + timezoneSeparator = timestampString.indexOf(QLatin1Char('-'), fractionSeparator + 1); + } + + QString stripped = timestampString; + if (timezoneSeparator > -1) { + stripped.remove(fractionSeparator, timezoneSeparator - fractionSeparator); + } else { + stripped.truncate(fractionSeparator); + } + + timestamp = QDateTime::fromString(stripped, Qt::ISODate); + } + + return timestamp; +} + +} // namespace MastodonTextUtils + +#endif // MASTODONTEXTUTILS_H -- cgit v1.2.3