|
// retoor <retoor@molodetz.nl>
|
|
|
|
import Foundation
|
|
|
|
struct SpamPattern: Sendable {
|
|
let name: String
|
|
let regex: NSRegularExpression?
|
|
let keywords: [String]
|
|
let minScore: Int
|
|
|
|
init(name: String, pattern: String? = nil, keywords: [String] = [], minScore: Int = 1) {
|
|
self.name = name
|
|
self.keywords = keywords.map { $0.lowercased() }
|
|
self.minScore = minScore
|
|
|
|
if let pattern = pattern {
|
|
self.regex = try? NSRegularExpression(pattern: pattern, options: [.caseInsensitive])
|
|
} else {
|
|
self.regex = nil
|
|
}
|
|
}
|
|
|
|
func matches(_ text: String) -> Bool {
|
|
let lowerText = text.lowercased()
|
|
|
|
if let regex = regex {
|
|
let range = NSRange(text.startIndex..., in: text)
|
|
if regex.firstMatch(in: text, options: [], range: range) != nil {
|
|
return true
|
|
}
|
|
}
|
|
|
|
for keyword in keywords {
|
|
if lowerText.contains(keyword) {
|
|
return true
|
|
}
|
|
}
|
|
|
|
return false
|
|
}
|
|
}
|
|
|
|
struct SpamDetector: Sendable {
|
|
let patterns: [SpamPattern]
|
|
let repetitionThreshold: Int
|
|
let minAccountAgeSeconds: TimeInterval
|
|
let suspiciousPostFrequencySeconds: TimeInterval
|
|
|
|
init() {
|
|
self.patterns = [
|
|
SpamPattern(
|
|
name: "crypto_scam",
|
|
pattern: "(?:bitcoin|crypto|ethereum|btc|eth|nft|web3).*(?:invest|earn|profit|money|free|giveaway)",
|
|
keywords: ["airdrop", "whitelist", "moon", "100x", "1000x"]
|
|
),
|
|
SpamPattern(
|
|
name: "promotion_spam",
|
|
pattern: "(?:check out|visit|click|join|subscribe).*(?:my|our|this).*(?:channel|website|link|discord|telegram)",
|
|
keywords: ["t.me/", "discord.gg/", "bit.ly/", "tinyurl"]
|
|
),
|
|
SpamPattern(
|
|
name: "adult_content",
|
|
keywords: ["onlyfans", "18+", "xxx", "porn", "nude", "nsfw link"]
|
|
),
|
|
SpamPattern(
|
|
name: "repetitive_chars",
|
|
pattern: "(.)\\1{10,}",
|
|
minScore: 2
|
|
),
|
|
SpamPattern(
|
|
name: "excessive_caps",
|
|
pattern: "^[A-Z\\s!?]{50,}$"
|
|
),
|
|
SpamPattern(
|
|
name: "phishing",
|
|
pattern: "(?:verify|confirm|update).*(?:account|password|login|credentials)",
|
|
keywords: ["suspended", "blocked", "verify now", "act now", "urgent"]
|
|
),
|
|
SpamPattern(
|
|
name: "gambling",
|
|
keywords: ["casino", "betting", "poker online", "slots", "jackpot", "win big"]
|
|
),
|
|
SpamPattern(
|
|
name: "malware_links",
|
|
pattern: "(?:download|install|get).*(?:free|cracked|hack|keygen|patch)"
|
|
)
|
|
]
|
|
|
|
self.repetitionThreshold = 3
|
|
self.minAccountAgeSeconds = 86400 * 7
|
|
self.suspiciousPostFrequencySeconds = 60
|
|
}
|
|
|
|
func analyze(_ text: String) -> SpamAnalysis {
|
|
var matchedPatterns: [String] = []
|
|
var totalScore = 0
|
|
|
|
for pattern in patterns {
|
|
if pattern.matches(text) {
|
|
matchedPatterns.append(pattern.name)
|
|
totalScore += pattern.minScore
|
|
}
|
|
}
|
|
|
|
let linkCount = countLinks(in: text)
|
|
if linkCount > 3 {
|
|
matchedPatterns.append("excessive_links")
|
|
totalScore += linkCount - 2
|
|
}
|
|
|
|
let textLength = text.count
|
|
if textLength < 10 && linkCount > 0 {
|
|
matchedPatterns.append("short_with_link")
|
|
totalScore += 2
|
|
}
|
|
|
|
return SpamAnalysis(
|
|
isSpam: totalScore >= 2,
|
|
score: totalScore,
|
|
matchedPatterns: matchedPatterns
|
|
)
|
|
}
|
|
|
|
func analyzeUserBehavior(posts: [PostInfo]) -> BehaviorAnalysis {
|
|
guard posts.count >= 2 else {
|
|
return BehaviorAnalysis(isSuspicious: false, reasons: [])
|
|
}
|
|
|
|
var reasons: [String] = []
|
|
|
|
let sortedPosts = posts.sorted { $0.created < $1.created }
|
|
var rapidPosts = 0
|
|
for i in 1..<sortedPosts.count {
|
|
let timeDiff = sortedPosts[i].created.timeIntervalSince(sortedPosts[i-1].created)
|
|
if timeDiff < suspiciousPostFrequencySeconds {
|
|
rapidPosts += 1
|
|
}
|
|
}
|
|
|
|
if rapidPosts >= 3 {
|
|
reasons.append("rapid_posting:\(rapidPosts)")
|
|
}
|
|
|
|
let uniqueTexts = Set(posts.map { normalizeText($0.text) })
|
|
let duplicateRatio = 1.0 - (Double(uniqueTexts.count) / Double(posts.count))
|
|
if duplicateRatio > 0.5 && posts.count >= 3 {
|
|
reasons.append("duplicate_content:\(Int(duplicateRatio * 100))%")
|
|
}
|
|
|
|
return BehaviorAnalysis(
|
|
isSuspicious: !reasons.isEmpty,
|
|
reasons: reasons
|
|
)
|
|
}
|
|
|
|
private func countLinks(in text: String) -> Int {
|
|
let pattern = try? NSRegularExpression(
|
|
pattern: "https?://[^\\s]+",
|
|
options: [.caseInsensitive]
|
|
)
|
|
let range = NSRange(text.startIndex..., in: text)
|
|
return pattern?.numberOfMatches(in: text, options: [], range: range) ?? 0
|
|
}
|
|
|
|
private func normalizeText(_ text: String) -> String {
|
|
return text.lowercased()
|
|
.components(separatedBy: .whitespacesAndNewlines)
|
|
.joined(separator: " ")
|
|
.trimmingCharacters(in: .whitespaces)
|
|
}
|
|
}
|
|
|
|
struct SpamAnalysis: Sendable {
|
|
let isSpam: Bool
|
|
let score: Int
|
|
let matchedPatterns: [String]
|
|
}
|
|
|
|
struct BehaviorAnalysis: Sendable {
|
|
let isSuspicious: Bool
|
|
let reasons: [String]
|
|
}
|
|
|
|
struct PostInfo: Sendable {
|
|
let id: Int
|
|
let text: String
|
|
let created: Date
|
|
let authorId: Int
|
|
}
|