refactor (backend): port extract-hashtags to backend-rs

This commit is contained in:
naskya 2024-07-21 09:17:13 +09:00
parent edc69f37b5
commit 332b6cd620
No known key found for this signature in database
GPG key ID: 712D413B3A9FED5C
16 changed files with 108 additions and 32 deletions

23
Cargo.lock generated
View file

@ -211,6 +211,7 @@ dependencies = [
"image",
"isahc",
"macros",
"mfm",
"napi",
"napi-build",
"napi-derive",
@ -1806,6 +1807,18 @@ version = "2.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
[[package]]
name = "mfm"
version = "0.1.0"
source = "git+https://firefish.dev/firefish/mfm.git?rev=1dc2cc408176e6559db08cb568e63236597341b4#1dc2cc408176e6559db08cb568e63236597341b4"
dependencies = [
"emojis",
"nom",
"nom-regex",
"once_cell",
"unicode-segmentation",
]
[[package]]
name = "mime"
version = "0.3.17"
@ -1928,6 +1941,16 @@ dependencies = [
"thiserror",
]
[[package]]
name = "nom-regex"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72e5c7731c4c1370b61604ed52a2475e861aac9e08dec9f23903d4ddfdc91c18"
dependencies = [
"nom",
"regex",
]
[[package]]
name = "noop_proc_macro"
version = "0.3.0"

View file

@ -26,6 +26,7 @@ emojis = { version = "0.6.2", default-features = false }
idna = { version = "1.0.2", default-features = false }
image = { version = "0.25.1", default-features = false }
isahc = { version = "1.7.2", default-features = false }
mfm = { git = "https://firefish.dev/firefish/mfm.git", rev = "1dc2cc408176e6559db08cb568e63236597341b4" }
nom-exif = { version = "1.2.6", default-features = false }
once_cell = { version = "1.19.0", default-features = false }
pretty_assertions = { version = "1.4.0", default-features = false }

View file

@ -30,6 +30,7 @@ emojis = { workspace = true }
idna = { workspace = true, features = ["std", "compiled_data"] }
image = { workspace = true, features = ["avif", "bmp", "gif", "ico", "jpeg", "png", "tiff", "webp"] }
isahc = { workspace = true, features = ["http2", "text-decoding", "json"] }
mfm = { workspace = true }
nom-exif = { workspace = true }
once_cell = { workspace = true }
openssl = { workspace = true, features = ["vendored"] }

View file

@ -371,6 +371,8 @@ export interface Emoji {
height: number | null
}
export declare function extractHashtags(text: string): Array<string>
export declare function extractHost(uri: string): string
export declare function fetchMeta(): Promise<Meta>

View file

@ -375,6 +375,7 @@ module.exports.decodeReaction = nativeBinding.decodeReaction
module.exports.DriveFileEvent = nativeBinding.DriveFileEvent
module.exports.DriveFileUsageHint = nativeBinding.DriveFileUsageHint
module.exports.DriveFolderEvent = nativeBinding.DriveFolderEvent
module.exports.extractHashtags = nativeBinding.extractHashtags
module.exports.extractHost = nativeBinding.extractHost
module.exports.fetchMeta = nativeBinding.fetchMeta
module.exports.fetchNodeinfo = nativeBinding.fetchNodeinfo

View file

@ -4,6 +4,7 @@ pub mod config;
pub mod database;
pub mod federation;
pub mod init;
pub mod mfm;
pub mod misc;
pub mod model;
pub mod service;

View file

@ -0,0 +1,14 @@
use mfm::{node, parse, MfmParseError};
#[macros::export]
pub fn extract_hashtags(text: &str) -> Result<Vec<String>, MfmParseError> {
Ok(parse(text)?
.into_iter()
.filter_map(|node| match node {
node::Node::Inline(node::Inline::Hashtag(node::Hashtag { hashtag })) => Some(hashtag),
_ => None,
})
.collect::<std::collections::HashSet<_>>()
.into_iter()
.collect())
}

View file

@ -0,0 +1 @@
pub mod hashtag;

View file

@ -1,9 +0,0 @@
import * as mfm from "mfm-js";
import { unique } from "@/prelude/array.js";
export function extractHashtags(nodes: mfm.MfmNode[]): string[] {
const hashtagNodes = mfm.extract(nodes, (node) => node.type === "hashtag");
const hashtags = unique(hashtagNodes.map((x) => x.props.hashtag));
return hashtags;
}

View file

@ -8,13 +8,14 @@ import { resolveImage } from "./image.js";
import type { CacheableRemoteUser } from "@/models/entities/user.js";
import { htmlToMfm } from "../misc/html-to-mfm.js";
import { extractApHashtags } from "./tag.js";
import { unique, toArray, toSingle } from "@/prelude/array.js";
import { unique, toArray, toSingle, concat } from "@/prelude/array.js";
import { extractPollFromQuestion } from "./question.js";
import vote from "@/services/note/polls/vote.js";
import { apLogger } from "../logger.js";
import type { DriveFile } from "@/models/entities/drive-file.js";
import {
type ImageSize,
extractHashtags,
extractHost,
genId,
getImageSizeFromUrl,
@ -48,7 +49,6 @@ import { extractApMentions } from "./mention.js";
import DbResolver from "../db-resolver.js";
import { StatusError } from "@/misc/fetch.js";
import { publishNoteStream } from "@/services/stream.js";
import { extractHashtags } from "@/misc/extract-hashtags.js";
import { UserProfiles } from "@/models/index.js";
import { In } from "typeorm";
import { config } from "@/config.js";
@ -663,19 +663,36 @@ export async function updateNote(value: string | IObject, resolver?: Resolver) {
const apMentions = await extractApMentions(post.tag);
const apHashtags = await extractApHashtags(post.tag);
let hashTags: string[];
const poll = await extractPollFromQuestion(post, resolver).catch(
() => undefined,
);
const choices = poll?.choices.flatMap((choice) => mfm.parse(choice)) ?? [];
if (apHashtags) {
hashTags = apHashtags;
} else {
hashTags = unique(
(text ? extractHashtags(text) : [])
.concat(cw ? extractHashtags(cw) : [])
.concat(
poll?.choices
? concat(
poll.choices.map((choice: string) => extractHashtags(choice)),
)
: [],
),
);
}
const choices =
poll?.choices.flatMap((choice: string) => mfm.parse(choice)) ?? [];
const tokens = mfm
.parse(text || "")
.concat(mfm.parse(cw || ""))
.concat(choices);
const hashTags: string[] = apHashtags || extractHashtags(tokens);
const mentionUsers =
apMentions || (await extractMentionedUsers(actor, tokens));

View file

@ -3,7 +3,7 @@ import { publishMainStream, publishUserEvent } from "@/services/stream.js";
import acceptAllFollowRequests from "@/services/following/requests/accept-all.js";
import { publishToFollowers } from "@/services/i/update.js";
import { extractCustomEmojisFromMfm } from "@/misc/extract-custom-emojis-from-mfm.js";
import { extractHashtags } from "@/misc/extract-hashtags.js";
import { extractHashtags } from "backend-rs";
import { updateUsertags } from "@/services/update-hashtag.js";
import { Users, DriveFiles, UserProfiles, Pages } from "@/models/index.js";
import type { User } from "@/models/entities/user.js";
@ -309,7 +309,7 @@ export default define(meta, paramDef, async (ps, _user, token) => {
if (newDescription != null) {
const tokens = mfm.parse(newDescription);
emojis = emojis.concat(extractCustomEmojisFromMfm(tokens!));
tags = extractHashtags(tokens!)
tags = extractHashtags(newDescription)
.map((tag) => normalizeForSearch(tag))
.splice(0, 32);
}

View file

@ -22,8 +22,8 @@ import { genId, HOUR } from "backend-rs";
import { getNote } from "@/server/api/common/getters.js";
import { Poll } from "@/models/entities/poll.js";
import * as mfm from "mfm-js";
import { concat } from "@/prelude/array.js";
import { extractHashtags } from "@/misc/extract-hashtags.js";
import { concat, unique } from "@/prelude/array.js";
import { extractHashtags } from "backend-rs";
import { extractCustomEmojisFromMfm } from "@/misc/extract-custom-emojis-from-mfm.js";
import { extractMentionedUsers } from "@/services/note/create.js";
import { publishNoteStream } from "@/services/stream.js";
@ -399,7 +399,15 @@ export default define(meta, paramDef, async (ps, user) => {
const combinedTokens = tokens.concat(cwTokens).concat(choiceTokens);
tags = extractHashtags(combinedTokens);
tags = unique(
(ps.text ? extractHashtags(ps.text) : [])
.concat(ps.cw ? extractHashtags(ps.cw) : [])
.concat(
ps.poll?.choices
? concat(ps.poll.choices.map((choice) => extractHashtags(choice)))
: [],
),
);
emojis = extractCustomEmojisFromMfm(combinedTokens);

View file

@ -44,9 +44,5 @@ export default define(meta, paramDef, async (ps, user) => {
return 204;
}
return translate(
note.text,
note.lang as string | null,
ps.targetLang,
);
return translate(note.text, note.lang as string | null, ps.targetLang);
});

View file

@ -8,7 +8,7 @@ import { deliverToRelays } from "@/services/relay.js";
import type { UserProfile } from "@/models/entities/user-profile.js";
import { extractCustomEmojisFromMfm } from "@/misc/extract-custom-emojis-from-mfm.js";
import mfm from "mfm-js";
import { extractHashtags } from "@/misc/extract-hashtags.js";
import { extractHashtags } from "backend-rs";
import { normalizeForSearch } from "@/misc/normalize-for-search.js";
import { updateUsertags } from "@/services/update-hashtag.js";
import { publishMainStream, publishUserEvent } from "@/services/stream.js";
@ -56,7 +56,7 @@ export async function updateUserProfileData(
if (newDescription != null) {
const tokens = mfm.parse(newDescription);
emojis = emojis.concat(extractCustomEmojisFromMfm(tokens!));
tags = extractHashtags(tokens!)
tags = extractHashtags(newDescription)
.map((tag) => normalizeForSearch(tag))
.splice(0, 32);
}

View file

@ -8,12 +8,11 @@ import { renderActivity } from "@/remote/activitypub/renderer/index.js";
import { resolveUser } from "@/remote/resolve-user.js";
import { config } from "@/config.js";
import { updateHashtags } from "@/services/update-hashtag.js";
import { concat } from "@/prelude/array.js";
import { concat, unique } from "@/prelude/array.js";
import { insertNoteUnread } from "@/services/note/unread.js";
import { registerOrFetchInstanceDoc } from "@/services/register-or-fetch-instance-doc.js";
import { extractMentions } from "@/misc/extract-mentions.js";
import { extractCustomEmojisFromMfm } from "@/misc/extract-custom-emojis-from-mfm.js";
import { extractHashtags } from "@/misc/extract-hashtags.js";
import type { IMentionedRemoteUsers } from "@/models/entities/note.js";
import { Note } from "@/models/entities/note.js";
import {
@ -39,6 +38,7 @@ import { Poll } from "@/models/entities/poll.js";
import { createNotification } from "@/services/create-notification.js";
import { isDuplicateKeyValueError } from "@/misc/is-duplicate-key-value-error.js";
import {
extractHashtags,
updateAntennasOnNewNote,
checkWordMute,
genId,
@ -291,7 +291,21 @@ export default async (
const combinedTokens = tokens.concat(cwTokens).concat(choiceTokens);
tags = data.apHashtags || extractHashtags(combinedTokens);
tags =
data.apHashtags ||
unique(
(data.text ? extractHashtags(data.text) : [])
.concat(data.cw ? extractHashtags(data.cw) : [])
.concat(
data.poll?.choices
? concat(
data.poll.choices.map((choice: string) =>
extractHashtags(choice),
),
)
: [],
),
);
emojis = data.apEmojis || extractCustomEmojisFromMfm(combinedTokens);

View file

@ -7,7 +7,6 @@ import DeliverManager from "@/remote/activitypub/deliver-manager.js";
import renderNote from "@/remote/activitypub/renderer/note.js";
import { renderActivity } from "@/remote/activitypub/renderer/index.js";
import { extractCustomEmojisFromMfm } from "@/misc/extract-custom-emojis-from-mfm.js";
import { extractHashtags } from "@/misc/extract-hashtags.js";
import type { IMentionedRemoteUsers } from "@/models/entities/note.js";
import type { Note } from "@/models/entities/note.js";
import {
@ -21,12 +20,13 @@ import {
import type { DriveFile } from "@/models/entities/drive-file.js";
import { In } from "typeorm";
import type { ILocalUser, IRemoteUser } from "@/models/entities/user.js";
import { genId } from "backend-rs";
import { extractHashtags, genId } from "backend-rs";
import type { IPoll } from "@/models/entities/poll.js";
import { deliverToRelays } from "../relay.js";
import renderUpdate from "@/remote/activitypub/renderer/update.js";
import { extractMentionedUsers } from "@/services/note/create.js";
import { normalizeForSearch } from "@/misc/normalize-for-search.js";
import { unique } from "@/prelude/array.js";
type Option = {
text?: string | null;
@ -51,7 +51,13 @@ export default async function (
const tokens = mfm.parse(data.text || "").concat(mfm.parse(data.cw || ""));
const tags: string[] = extractHashtags(tokens)
const extractedTags = unique(
(data.text ? extractHashtags(data.text) : []).concat(
data.cw ? extractHashtags(data.cw) : [],
),
);
const tags: string[] = extractedTags
.filter((tag) => Array.from(tag || "").length <= 128)
.splice(0, 32)
.map(normalizeForSearch);