From 580191fb172eccbbd12c2dfbccbd8346f38de91e Mon Sep 17 00:00:00 2001
From: Aya Morisawa <AyaMorisawa4869@gmail.com>
Date: Sat, 22 Dec 2018 00:41:54 +0900
Subject: [PATCH] Improve MFM bracket matching

Co-authored-by: syuilo <syuilotan@yahoo.co.jp>
---
 src/mfm/parser.ts    | 42 +++++++-------------
 src/prelude/array.ts |  6 +++
 test/mfm.ts          | 95 +++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 115 insertions(+), 28 deletions(-)

diff --git a/src/mfm/parser.ts b/src/mfm/parser.ts
index 885b7e01cd..205d5ede1a 100644
--- a/src/mfm/parser.ts
+++ b/src/mfm/parser.ts
@@ -1,7 +1,7 @@
 import * as P from 'parsimmon';
 import parseAcct from '../misc/acct/parse';
 import { toUnicode } from 'punycode';
-import { takeWhile } from '../prelude/array';
+import { takeWhile, cumulativeSum } from '../prelude/array';
 import { Tree } from '../prelude/tree';
 import * as T from '../prelude/tree';
 
@@ -42,30 +42,18 @@ export function createTree(type: string, children: MfmForest, props: any): MfmTr
 	return T.createTree({ type, props }, children);
 }
 
-function getTrailingPosition(x: string): number {
-	const brackets = [
-		['(', ')'],
-		['「', '」'],
-	];
-	const pendingBrackets = [] as any;
-	const end = x.split('').findIndex(char => {
-		const closeMatch = brackets.map(x => x[1]).indexOf(char);
-		const openMatch = brackets.map(x => x[0]).indexOf(char);
-		if (closeMatch != -1) {
-			if (pendingBrackets[closeMatch] > 0) {
-				pendingBrackets[closeMatch]--;
-				return false;
-			} else {
-				return true;
-			}
-		} else if (openMatch != -1) {
-			pendingBrackets[openMatch] = (pendingBrackets[openMatch] || 0) + 1;
-			return false;
-		} else {
-			return false;
-		}
-	});
-	return end > 0 ? end : x.length;
+export function removeOrphanedBrackets(s: string): string {
+	const openBrackets = ['(', '「'];
+	const closeBrackets = [')', '」'];
+	const xs = cumulativeSum(s.split('').map(c => {
+		if (openBrackets.includes(c)) return 1;
+		if (closeBrackets.includes(c)) return -1;
+		return 0;
+	}));
+	const firstOrphanedCloseBracket = xs.findIndex(x => x < 0);
+	if (firstOrphanedCloseBracket !== -1) return s.substr(0, firstOrphanedCloseBracket);
+	const lastMatched = xs.lastIndexOf(0);
+	return s.substr(0, lastMatched + 1);
 }
 
 const newline = P((input, i) => {
@@ -220,7 +208,7 @@ const mfm = P.createLanguage({
 			const match = text.match(/^#([^\s\.,!\?#]+)/i);
 			if (!match) return P.makeFailure(i, 'not a hashtag');
 			let hashtag = match[1];
-			hashtag = hashtag.substr(0, getTrailingPosition(hashtag));
+			hashtag = removeOrphanedBrackets(hashtag);
 			if (hashtag.match(/^[0-9]+$/)) return P.makeFailure(i, 'not a hashtag');
 			if (input[i - 1] != null && input[i - 1].match(/[a-z0-9]/i)) return P.makeFailure(i, 'not a hashtag');
 			if (hashtag.length > 50) return P.makeFailure(i, 'not a hashtag');
@@ -390,7 +378,7 @@ const mfm = P.createLanguage({
 			const match = text.match(/^https?:\/\/[\w\/:%#@\$&\?!\(\)\[\]~\.,=\+\-]+/);
 			if (!match) return P.makeFailure(i, 'not a url');
 			let url = match[0];
-			url = url.substr(0, getTrailingPosition(url));
+			url = removeOrphanedBrackets(url);
 			if (url.endsWith('.')) url = url.substr(0, url.lastIndexOf('.'));
 			if (url.endsWith(',')) url = url.substr(0, url.lastIndexOf(','));
 			return P.makeSuccess(i + url.length, url);
diff --git a/src/prelude/array.ts b/src/prelude/array.ts
index d02de9b2e5..560dfa080d 100644
--- a/src/prelude/array.ts
+++ b/src/prelude/array.ts
@@ -109,3 +109,9 @@ export function takeWhile<T>(f: Predicate<T>, xs: T[]): T[] {
 	}
 	return ys;
 }
+
+export function cumulativeSum(xs: number[]): number[] {
+	const ys = Array.from(xs); // deep copy
+	for (let i = 1; i < ys.length; i++) ys[i] += ys[i - 1];
+	return ys;
+}
diff --git a/test/mfm.ts b/test/mfm.ts
index 4811e1bbb2..6bbbe146ca 100644
--- a/test/mfm.ts
+++ b/test/mfm.ts
@@ -6,7 +6,7 @@ import * as assert from 'assert';
 
 import analyze from '../src/mfm/parse';
 import toHtml from '../src/mfm/html';
-import { createTree as tree, createLeaf as leaf, MfmTree } from '../src/mfm/parser';
+import { createTree as tree, createLeaf as leaf, MfmTree, removeOrphanedBrackets } from '../src/mfm/parser';
 
 function text(text: string): MfmTree {
 	return leaf('text', { text });
@@ -49,6 +49,99 @@ describe('createTree', () => {
 	});
 });
 
+describe('removeOrphanedBrackets', () => {
+	it('single (contained)', () => {
+		const input = '(foo)';
+		const expected = '(foo)';
+		const actual = removeOrphanedBrackets(input);
+		assert.deepStrictEqual(actual, expected);
+	});
+
+	it('single (head)', () => {
+		const input = '(foo)bar';
+		const expected = '(foo)bar';
+		const actual = removeOrphanedBrackets(input);
+		assert.deepStrictEqual(actual, expected);
+	});
+
+	it('single (tail)', () => {
+		const input = 'foo(bar)';
+		const expected = 'foo(bar)';
+		const actual = removeOrphanedBrackets(input);
+		assert.deepStrictEqual(actual, expected);
+	});
+
+	it('a', () => {
+		const input = '(foo';
+		const expected = '';
+		const actual = removeOrphanedBrackets(input);
+		assert.deepStrictEqual(actual, expected);
+	});
+
+	it('b', () => {
+		const input = ')foo';
+		const expected = '';
+		const actual = removeOrphanedBrackets(input);
+		assert.deepStrictEqual(actual, expected);
+	});
+
+	it('nested', () => {
+		const input = 'foo(「(bar)」)';
+		const expected = 'foo(「(bar)」)';
+		const actual = removeOrphanedBrackets(input);
+		assert.deepStrictEqual(actual, expected);
+	});
+
+	it('no brackets', () => {
+		const input = 'foo';
+		const expected = 'foo';
+		const actual = removeOrphanedBrackets(input);
+		assert.deepStrictEqual(actual, expected);
+	});
+
+	it('with foreign bracket (single)', () => {
+		const input = 'foo(bar))';
+		const expected = 'foo(bar)';
+		const actual = removeOrphanedBrackets(input);
+		assert.deepStrictEqual(actual, expected);
+	});
+
+	it('with foreign bracket (open)', () => {
+		const input = 'foo(bar';
+		const expected = 'foo';
+		const actual = removeOrphanedBrackets(input);
+		assert.deepStrictEqual(actual, expected);
+	});
+
+	it('with foreign bracket (close)', () => {
+		const input = 'foo)bar';
+		const expected = 'foo';
+		const actual = removeOrphanedBrackets(input);
+		assert.deepStrictEqual(actual, expected);
+	});
+
+	it('with foreign bracket (close and open)', () => {
+		const input = 'foo)(bar';
+		const expected = 'foo';
+		const actual = removeOrphanedBrackets(input);
+		assert.deepStrictEqual(actual, expected);
+	});
+
+	it('various bracket type', () => {
+		const input = 'foo「(bar)」(';
+		const expected = 'foo「(bar)」';
+		const actual = removeOrphanedBrackets(input);
+		assert.deepStrictEqual(actual, expected);
+	});
+
+	it('intersected', () => {
+		const input = 'foo(「)」';
+		const expected = 'foo(「)」';
+		const actual = removeOrphanedBrackets(input);
+		assert.deepStrictEqual(actual, expected);
+	});
+});
+
 describe('MFM', () => {
 	it('can be analyzed', () => {
 		const tokens = analyze('@himawari @hima_sub@namori.net お腹ペコい :cat: #yryr');