summaryrefslogtreecommitdiff
path: root/common/mastodontextutils.h
blob: bde74c49991d1277e602ae27f21012e2872c0179 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
/*
 * Copyright (C) 2013-2026 Jolla Ltd.
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 */

#ifndef MASTODONTEXTUTILS_H
#define MASTODONTEXTUTILS_H

#include <QtCore/QDateTime>
#include <QtCore/QRegularExpression>
#include <QtCore/QString>

namespace MastodonTextUtils {

inline QString decodeHtmlEntities(QString text)
{
    text.replace(QStringLiteral("&quot;"), QStringLiteral("\""));
    text.replace(QStringLiteral("&apos;"), QStringLiteral("'"));
    text.replace(QStringLiteral("&lt;"), QStringLiteral("<"));
    text.replace(QStringLiteral("&gt;"), QStringLiteral(">"));
    text.replace(QStringLiteral("&amp;"), QStringLiteral("&"));
    text.replace(QStringLiteral("&nbsp;"), QStringLiteral(" "));

    static const QRegularExpression decimalEntity(QStringLiteral("&#(\\d+);"));
    QRegularExpressionMatch match;
    int index = 0;
    while ((index = text.indexOf(decimalEntity, index, &match)) != -1) {
        bool ok = false;
        const uint value = match.captured(1).toUInt(&ok, 10);
        QString replacement;
        if (ok && value > 0 && value <= 0x10FFFF) {
            replacement = QString::fromUcs4(&value, 1);
        }
        text.replace(index, match.capturedLength(0), replacement);
        index += replacement.size();
    }

    static const QRegularExpression hexEntity(QStringLiteral("&#x([0-9a-fA-F]+);"));
    index = 0;
    while ((index = text.indexOf(hexEntity, index, &match)) != -1) {
        bool ok = false;
        const uint value = match.captured(1).toUInt(&ok, 16);
        QString replacement;
        if (ok && value > 0 && value <= 0x10FFFF) {
            replacement = QString::fromUcs4(&value, 1);
        }
        text.replace(index, match.capturedLength(0), replacement);
        index += replacement.size();
    }

    return text;
}

inline QString sanitizeContent(const QString &content)
{
    QString plain = content;
    plain.replace(QRegularExpression(QStringLiteral("<\\s*br\\s*/?\\s*>"), QRegularExpression::CaseInsensitiveOption),
                  QStringLiteral("\n"));
    plain.replace(QRegularExpression(QStringLiteral("<\\s*/\\s*p\\s*>"), QRegularExpression::CaseInsensitiveOption),
                  QStringLiteral("\n"));
    plain.remove(QRegularExpression(QStringLiteral("<[^>]+>"), QRegularExpression::CaseInsensitiveOption));

    return decodeHtmlEntities(plain).trimmed();
}

inline QDateTime parseTimestamp(const QString &timestampString)
{
    QDateTime timestamp;

#if QT_VERSION >= QT_VERSION_CHECK(5, 8, 0)
    timestamp = QDateTime::fromString(timestampString, Qt::ISODateWithMs);
    if (timestamp.isValid()) {
        return timestamp;
    }
#endif

    timestamp = QDateTime::fromString(timestampString, Qt::ISODate);
    if (timestamp.isValid()) {
        return timestamp;
    }

    // Qt 5.6 cannot parse ISO-8601 timestamps with fractional seconds.
    const int timeSeparator = timestampString.indexOf(QLatin1Char('T'));
    const int fractionSeparator = timestampString.indexOf(QLatin1Char('.'), timeSeparator + 1);
    if (timeSeparator > -1 && fractionSeparator > -1) {
        int timezoneSeparator = timestampString.indexOf(QLatin1Char('Z'), fractionSeparator + 1);
        if (timezoneSeparator == -1) {
            timezoneSeparator = timestampString.indexOf(QLatin1Char('+'), fractionSeparator + 1);
        }
        if (timezoneSeparator == -1) {
            timezoneSeparator = timestampString.indexOf(QLatin1Char('-'), fractionSeparator + 1);
        }

        QString stripped = timestampString;
        if (timezoneSeparator > -1) {
            stripped.remove(fractionSeparator, timezoneSeparator - fractionSeparator);
        } else {
            stripped.truncate(fractionSeparator);
        }

        timestamp = QDateTime::fromString(stripped, Qt::ISODate);
    }

    return timestamp;
}

} // namespace MastodonTextUtils

#endif // MASTODONTEXTUTILS_H