462 lines
12 KiB
TypeScript
462 lines
12 KiB
TypeScript
/*
|
||
* SPDX-FileCopyrightText: syuilo and misskey-project
|
||
* SPDX-License-Identifier: AGPL-3.0-only
|
||
*/
|
||
|
||
import { URL } from 'node:url';
|
||
import { Inject, Injectable } from '@nestjs/common';
|
||
import * as parse5 from 'parse5';
|
||
import { Window, XMLSerializer } from 'happy-dom';
|
||
import { DI } from '@/di-symbols.js';
|
||
import type { Config } from '@/config.js';
|
||
import { intersperse } from '@/misc/prelude/array.js';
|
||
import { normalizeForSearch } from '@/misc/normalize-for-search.js';
|
||
import type { IMentionedRemoteUsers } from '@/models/Note.js';
|
||
import { bindThis } from '@/decorators.js';
|
||
import type { DefaultTreeAdapterMap } from 'parse5';
|
||
import type * as mfm from 'mfm-js';
|
||
|
||
const treeAdapter = parse5.defaultTreeAdapter;
|
||
type Node = DefaultTreeAdapterMap['node'];
|
||
type ChildNode = DefaultTreeAdapterMap['childNode'];
|
||
|
||
const urlRegex = /^https?:\/\/[\w\/:%#@$&?!()\[\]~.,=+\-]+/;
|
||
const urlRegexFull = /^https?:\/\/[\w\/:%#@$&?!()\[\]~.,=+\-]+$/;
|
||
|
||
@Injectable()
|
||
export class MfmService {
|
||
constructor(
|
||
@Inject(DI.config)
|
||
private config: Config,
|
||
) {
|
||
}
|
||
|
||
@bindThis
|
||
public fromHtml(html: string, hashtagNames?: string[]): string {
|
||
// some AP servers like Pixelfed use br tags as well as newlines
|
||
html = html.replace(/<br\s?\/?>\r?\n/gi, '\n');
|
||
|
||
const normalizedHashtagNames = hashtagNames == null ? undefined : new Set<string>(hashtagNames.map(x => normalizeForSearch(x)));
|
||
|
||
const dom = parse5.parseFragment(html);
|
||
|
||
let text = '';
|
||
|
||
for (const n of dom.childNodes) {
|
||
analyze(n);
|
||
}
|
||
|
||
return text.trim();
|
||
|
||
function getText(node: Node): string {
|
||
if (treeAdapter.isTextNode(node)) return node.value;
|
||
if (!treeAdapter.isElementNode(node)) return '';
|
||
if (node.nodeName === 'br') return '\n';
|
||
|
||
if (node.childNodes) {
|
||
return node.childNodes.map(n => getText(n)).join('');
|
||
}
|
||
|
||
return '';
|
||
}
|
||
|
||
function appendChildren(childNodes: ChildNode[]): void {
|
||
if (childNodes) {
|
||
for (const n of childNodes) {
|
||
analyze(n);
|
||
}
|
||
}
|
||
}
|
||
|
||
function analyze(node: Node) {
|
||
if (treeAdapter.isTextNode(node)) {
|
||
text += node.value;
|
||
return;
|
||
}
|
||
|
||
// Skip comment or document type node
|
||
if (!treeAdapter.isElementNode(node)) {
|
||
return;
|
||
}
|
||
|
||
switch (node.nodeName) {
|
||
case 'br': {
|
||
text += '\n';
|
||
break;
|
||
}
|
||
|
||
case 'a': {
|
||
const txt = getText(node);
|
||
const rel = node.attrs.find(x => x.name === 'rel');
|
||
const href = node.attrs.find(x => x.name === 'href');
|
||
|
||
// ハッシュタグ
|
||
if (normalizedHashtagNames && href && normalizedHashtagNames.has(normalizeForSearch(txt))) {
|
||
text += txt;
|
||
// メンション
|
||
} else if (txt.startsWith('@') && !(rel && rel.value.startsWith('me '))) {
|
||
const part = txt.split('@');
|
||
|
||
if (part.length === 2 && href) {
|
||
//#region ホスト名部分が省略されているので復元する
|
||
const acct = `${txt}@${(new URL(href.value)).hostname}`;
|
||
text += acct;
|
||
//#endregion
|
||
} else if (part.length === 3) {
|
||
text += txt;
|
||
}
|
||
// その他
|
||
} else {
|
||
const generateLink = () => {
|
||
if (!href && !txt) {
|
||
return '';
|
||
}
|
||
if (!href) {
|
||
return txt;
|
||
}
|
||
if (!txt || txt === href.value) { // #6383: Missing text node
|
||
if (href.value.match(urlRegexFull)) {
|
||
return href.value;
|
||
} else {
|
||
return `<${href.value}>`;
|
||
}
|
||
}
|
||
if (href.value.match(urlRegex) && !href.value.match(urlRegexFull)) {
|
||
return `[${txt}](<${href.value}>)`; // #6846
|
||
} else {
|
||
return `[${txt}](${href.value})`;
|
||
}
|
||
};
|
||
|
||
text += generateLink();
|
||
}
|
||
break;
|
||
}
|
||
|
||
case 'h1': {
|
||
text += '【';
|
||
appendChildren(node.childNodes);
|
||
text += '】\n';
|
||
break;
|
||
}
|
||
|
||
case 'b':
|
||
case 'strong': {
|
||
text += '**';
|
||
appendChildren(node.childNodes);
|
||
text += '**';
|
||
break;
|
||
}
|
||
|
||
case 'small': {
|
||
text += '<small>';
|
||
appendChildren(node.childNodes);
|
||
text += '</small>';
|
||
break;
|
||
}
|
||
|
||
case 's':
|
||
case 'del': {
|
||
text += '~~';
|
||
appendChildren(node.childNodes);
|
||
text += '~~';
|
||
break;
|
||
}
|
||
|
||
case 'i':
|
||
case 'em': {
|
||
text += '<i>';
|
||
appendChildren(node.childNodes);
|
||
text += '</i>';
|
||
break;
|
||
}
|
||
|
||
// block code (<pre><code>)
|
||
case 'pre': {
|
||
if (node.childNodes.length === 1 && node.childNodes[0].nodeName === 'code') {
|
||
text += '\n```\n';
|
||
text += getText(node.childNodes[0]);
|
||
text += '\n```\n';
|
||
} else {
|
||
appendChildren(node.childNodes);
|
||
}
|
||
break;
|
||
}
|
||
|
||
// inline code (<code>)
|
||
case 'code': {
|
||
text += '`';
|
||
appendChildren(node.childNodes);
|
||
text += '`';
|
||
break;
|
||
}
|
||
|
||
case 'blockquote': {
|
||
const t = getText(node);
|
||
if (t) {
|
||
text += '\n> ';
|
||
text += t.split('\n').join('\n> ');
|
||
}
|
||
break;
|
||
}
|
||
|
||
case 'p':
|
||
case 'h2':
|
||
case 'h3':
|
||
case 'h4':
|
||
case 'h5':
|
||
case 'h6': {
|
||
text += '\n\n';
|
||
appendChildren(node.childNodes);
|
||
break;
|
||
}
|
||
|
||
// other block elements
|
||
case 'div':
|
||
case 'header':
|
||
case 'footer':
|
||
case 'article':
|
||
case 'li':
|
||
case 'dt':
|
||
case 'dd': {
|
||
text += '\n';
|
||
appendChildren(node.childNodes);
|
||
break;
|
||
}
|
||
|
||
default: // includes inline elements
|
||
{
|
||
appendChildren(node.childNodes);
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
@bindThis
|
||
public toHtml(nodes: mfm.MfmNode[] | null, mentionedRemoteUsers: IMentionedRemoteUsers = []) {
|
||
if (nodes == null) {
|
||
return null;
|
||
}
|
||
|
||
const { window } = new Window();
|
||
|
||
const doc = window.document;
|
||
|
||
const body = doc.createElement('p');
|
||
|
||
function appendChildren(children: mfm.MfmNode[], targetElement: any): void {
|
||
if (children) {
|
||
for (const child of children.map(x => (handlers as any)[x.type](x))) targetElement.appendChild(child);
|
||
}
|
||
}
|
||
|
||
function fnDefault(node: mfm.MfmFn) {
|
||
const el = doc.createElement('i');
|
||
appendChildren(node.children, el);
|
||
return el;
|
||
}
|
||
|
||
const handlers: { [K in mfm.MfmNode['type']]: (node: mfm.NodeType<K>) => any } = {
|
||
bold: (node) => {
|
||
const el = doc.createElement('b');
|
||
appendChildren(node.children, el);
|
||
return el;
|
||
},
|
||
|
||
small: (node) => {
|
||
const el = doc.createElement('small');
|
||
appendChildren(node.children, el);
|
||
return el;
|
||
},
|
||
|
||
strike: (node) => {
|
||
const el = doc.createElement('del');
|
||
appendChildren(node.children, el);
|
||
return el;
|
||
},
|
||
|
||
italic: (node) => {
|
||
const el = doc.createElement('i');
|
||
appendChildren(node.children, el);
|
||
return el;
|
||
},
|
||
|
||
fn: (node) => {
|
||
switch (node.props.name) {
|
||
case 'unixtime': {
|
||
const text = node.children[0].type === 'text' ? node.children[0].props.text : '';
|
||
try {
|
||
const date = new Date(parseInt(text, 10) * 1000);
|
||
const el = doc.createElement('time');
|
||
el.setAttribute('datetime', date.toISOString());
|
||
el.textContent = date.toISOString();
|
||
return el;
|
||
} catch (err) {
|
||
return fnDefault(node);
|
||
}
|
||
}
|
||
|
||
case 'ruby': {
|
||
if (node.children.length === 1) {
|
||
const child = node.children[0];
|
||
const text = child.type === 'text' ? child.props.text : '';
|
||
const rubyEl = doc.createElement('ruby');
|
||
const rtEl = doc.createElement('rt');
|
||
|
||
// ruby未対応のHTMLサニタイザーを通したときにルビが「劉備(りゅうび)」となるようにする
|
||
const rpStartEl = doc.createElement('rp');
|
||
rpStartEl.appendChild(doc.createTextNode('('));
|
||
const rpEndEl = doc.createElement('rp');
|
||
rpEndEl.appendChild(doc.createTextNode(')'));
|
||
|
||
rubyEl.appendChild(doc.createTextNode(text.split(' ')[0]));
|
||
rtEl.appendChild(doc.createTextNode(text.split(' ')[1]));
|
||
rubyEl.appendChild(rpStartEl);
|
||
rubyEl.appendChild(rtEl);
|
||
rubyEl.appendChild(rpEndEl);
|
||
return rubyEl;
|
||
} else {
|
||
const rt = node.children.at(-1);
|
||
|
||
if (!rt) {
|
||
return fnDefault(node);
|
||
}
|
||
|
||
const text = rt.type === 'text' ? rt.props.text : '';
|
||
const rubyEl = doc.createElement('ruby');
|
||
const rtEl = doc.createElement('rt');
|
||
|
||
// ruby未対応のHTMLサニタイザーを通したときにルビが「劉備(りゅうび)」となるようにする
|
||
const rpStartEl = doc.createElement('rp');
|
||
rpStartEl.appendChild(doc.createTextNode('('));
|
||
const rpEndEl = doc.createElement('rp');
|
||
rpEndEl.appendChild(doc.createTextNode(')'));
|
||
|
||
appendChildren(node.children.slice(0, node.children.length - 1), rubyEl);
|
||
rtEl.appendChild(doc.createTextNode(text.trim()));
|
||
rubyEl.appendChild(rpStartEl);
|
||
rubyEl.appendChild(rtEl);
|
||
rubyEl.appendChild(rpEndEl);
|
||
return rubyEl;
|
||
}
|
||
}
|
||
|
||
default: {
|
||
return fnDefault(node);
|
||
}
|
||
}
|
||
},
|
||
|
||
blockCode: (node) => {
|
||
const pre = doc.createElement('pre');
|
||
const inner = doc.createElement('code');
|
||
inner.textContent = node.props.code;
|
||
pre.appendChild(inner);
|
||
return pre;
|
||
},
|
||
|
||
center: (node) => {
|
||
const el = doc.createElement('div');
|
||
appendChildren(node.children, el);
|
||
return el;
|
||
},
|
||
|
||
emojiCode: (node) => {
|
||
return doc.createTextNode(`\u200B:${node.props.name}:\u200B`);
|
||
},
|
||
|
||
unicodeEmoji: (node) => {
|
||
return doc.createTextNode(node.props.emoji);
|
||
},
|
||
|
||
hashtag: (node) => {
|
||
const a = doc.createElement('a');
|
||
a.setAttribute('href', `${this.config.url}/tags/${node.props.hashtag}`);
|
||
a.textContent = `#${node.props.hashtag}`;
|
||
a.setAttribute('rel', 'tag');
|
||
return a;
|
||
},
|
||
|
||
inlineCode: (node) => {
|
||
const el = doc.createElement('code');
|
||
el.textContent = node.props.code;
|
||
return el;
|
||
},
|
||
|
||
mathInline: (node) => {
|
||
const el = doc.createElement('code');
|
||
el.textContent = node.props.formula;
|
||
return el;
|
||
},
|
||
|
||
mathBlock: (node) => {
|
||
const el = doc.createElement('code');
|
||
el.textContent = node.props.formula;
|
||
return el;
|
||
},
|
||
|
||
link: (node) => {
|
||
const a = doc.createElement('a');
|
||
a.setAttribute('href', node.props.url);
|
||
appendChildren(node.children, a);
|
||
return a;
|
||
},
|
||
|
||
mention: (node) => {
|
||
const a = doc.createElement('a');
|
||
const { username, host, acct } = node.props;
|
||
const remoteUserInfo = mentionedRemoteUsers.find(remoteUser => remoteUser.username === username && remoteUser.host === host);
|
||
a.setAttribute('href', remoteUserInfo ? (remoteUserInfo.url ? remoteUserInfo.url : remoteUserInfo.uri) : `${this.config.url}/${acct}`);
|
||
a.className = 'u-url mention';
|
||
a.textContent = acct;
|
||
return a;
|
||
},
|
||
|
||
quote: (node) => {
|
||
const el = doc.createElement('blockquote');
|
||
appendChildren(node.children, el);
|
||
return el;
|
||
},
|
||
|
||
text: (node) => {
|
||
if (!node.props.text.match(/[\r\n]/)) {
|
||
return doc.createTextNode(node.props.text);
|
||
}
|
||
|
||
const el = doc.createElement('span');
|
||
const nodes = node.props.text.split(/\r\n|\r|\n/).map(x => doc.createTextNode(x));
|
||
|
||
for (const x of intersperse<FIXME | 'br'>('br', nodes)) {
|
||
el.appendChild(x === 'br' ? doc.createElement('br') : x);
|
||
}
|
||
|
||
return el;
|
||
},
|
||
|
||
url: (node) => {
|
||
const a = doc.createElement('a');
|
||
a.setAttribute('href', node.props.url);
|
||
a.textContent = node.props.url;
|
||
return a;
|
||
},
|
||
|
||
search: (node) => {
|
||
const a = doc.createElement('a');
|
||
a.setAttribute('href', `https://www.google.com/search?q=${node.props.query}`);
|
||
a.textContent = node.props.content;
|
||
return a;
|
||
},
|
||
|
||
plain: (node) => {
|
||
const el = doc.createElement('span');
|
||
appendChildren(node.children, el);
|
||
return el;
|
||
},
|
||
};
|
||
|
||
appendChildren(nodes, body);
|
||
|
||
return new XMLSerializer().serializeToString(body);
|
||
}
|
||
}
|