refactor: Replace java-string-similarity with pure Kotlin Levenshtein implementation

This commit is contained in:
Ahmad Ansori Palembani 2024-07-31 12:20:40 +07:00
parent 8cca5186dd
commit f64bdb2ca5
Signed by: null2264
GPG key ID: BA64F8B60AF3EFB6
5 changed files with 88 additions and 9 deletions

View file

@ -267,9 +267,6 @@ dependencies {
implementation(platform(kotlinx.coroutines.bom)) implementation(platform(kotlinx.coroutines.bom))
implementation(kotlinx.bundles.coroutines) implementation(kotlinx.bundles.coroutines)
// Text distance
implementation(libs.java.string.similarity)
// TLS 1.3 support for Android < 10 // TLS 1.3 support for Android < 10
implementation(libs.conscrypt) implementation(libs.conscrypt)

View file

@ -6,12 +6,12 @@ import eu.kanade.tachiyomi.domain.manga.models.Manga
import eu.kanade.tachiyomi.source.CatalogueSource import eu.kanade.tachiyomi.source.CatalogueSource
import eu.kanade.tachiyomi.source.model.SManga import eu.kanade.tachiyomi.source.model.SManga
import eu.kanade.tachiyomi.util.lang.toNormalized import eu.kanade.tachiyomi.util.lang.toNormalized
import info.debatty.java.stringsimilarity.NormalizedLevenshtein
import kotlinx.coroutines.CoroutineScope import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.Dispatchers import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.Job import kotlinx.coroutines.Job
import kotlinx.coroutines.supervisorScope import kotlinx.coroutines.supervisorScope
import uy.kohesive.injekt.injectLazy import uy.kohesive.injekt.injectLazy
import yokai.util.normalizedLevenshteinSimilarity
import kotlin.coroutines.CoroutineContext import kotlin.coroutines.CoroutineContext
class SmartSearchEngine( class SmartSearchEngine(
@ -22,8 +22,6 @@ class SmartSearchEngine(
private val db: DatabaseHelper by injectLazy() private val db: DatabaseHelper by injectLazy()
private val normalizedLevenshtein = NormalizedLevenshtein()
/*suspend fun smartSearch(source: CatalogueSource, title: String): SManga? { /*suspend fun smartSearch(source: CatalogueSource, title: String): SManga? {
val cleanedTitle = cleanSmartSearchTitle(title) val cleanedTitle = cleanSmartSearchTitle(title)
@ -40,7 +38,7 @@ class SmartSearchEngine(
searchResults.mangas.map { searchResults.mangas.map {
val cleanedMangaTitle = cleanSmartSearchTitle(it.title) val cleanedMangaTitle = cleanSmartSearchTitle(it.title)
val normalizedDistance = normalizedLevenshtein.similarity(cleanedTitle, cleanedMangaTitle) val normalizedDistance = normalizedLevenshteinSimilarity(cleanedTitle, cleanedMangaTitle)
SearchEntry(it, normalizedDistance) SearchEntry(it, normalizedDistance)
}.filter { (_, normalizedDistance) -> }.filter { (_, normalizedDistance) ->
normalizedDistance >= MIN_SMART_ELIGIBLE_THRESHOLD normalizedDistance >= MIN_SMART_ELIGIBLE_THRESHOLD
@ -68,7 +66,7 @@ class SmartSearchEngine(
} }
searchResults.mangas.map { searchResults.mangas.map {
val normalizedDistance = normalizedLevenshtein.similarity(titleNormalized, it.title.toNormalized()) val normalizedDistance = normalizedLevenshteinSimilarity(titleNormalized, it.title.toNormalized())
SearchEntry(it, normalizedDistance) SearchEntry(it, normalizedDistance)
}.filter { (_, normalizedDistance) -> }.filter { (_, normalizedDistance) ->
normalizedDistance >= MIN_NORMAL_ELIGIBLE_THRESHOLD normalizedDistance >= MIN_NORMAL_ELIGIBLE_THRESHOLD
@ -77,6 +75,7 @@ class SmartSearchEngine(
return eligibleManga.maxByOrNull { it.dist }?.manga return eligibleManga.maxByOrNull { it.dist }?.manga
} }
private fun removeTextInBrackets(text: String, readForward: Boolean): String { private fun removeTextInBrackets(text: String, readForward: Boolean): String {
val bracketPairs = listOf( val bracketPairs = listOf(
'(' to ')', '(' to ')',

View file

@ -0,0 +1,57 @@
package yokai.util
import kotlin.math.max
import kotlin.math.min
/**
* Modified version of ademar111190's Levenshtein implementation
*
* REF: https://gist.github.com/ademar111190/34d3de41308389a0d0d8
*/
fun levenshteinDistance(lhs : CharSequence, rhs : CharSequence): Int {
if (lhs == rhs) return 0
if (lhs.isEmpty()) return rhs.length
if (rhs.isEmpty()) return lhs.length
val lhsLength = lhs.length + 1
val rhsLength = rhs.length + 1
var cost = Array(lhsLength) { it }
var newCost = Array(lhsLength) { 0 }
for (i in 1..<rhsLength) {
newCost[0] = i
var minCost = i
for (j in 1..<lhsLength) {
val match = if (lhs[j - 1] == rhs[i - 1]) 0 else 1
val costReplace = cost[j - 1] + match
val costInsert = cost[j] + 1
val costDelete = newCost[j - 1] + 1
newCost[j] = min(min(costInsert, costDelete), costReplace)
minCost = min(minCost, newCost[j])
}
// Hardcode limit to integer limit, just in case
if (minCost >= Int.MAX_VALUE) return Int.MAX_VALUE
val swap = cost
cost = newCost
newCost = swap
}
return cost.last()
}
fun normalizedLevenshteinSimilarity(lhs : CharSequence, rhs : CharSequence): Double {
val distance by lazy {
val maxLength = max(lhs.length, rhs.length)
if (maxLength == 0) return@lazy 0.0
levenshteinDistance(lhs, rhs) / maxLength.toDouble()
}
return 1.0 - distance
}

View file

@ -0,0 +1,27 @@
package yokai.util
import org.junit.jupiter.api.Assertions.assertEquals
import org.junit.jupiter.api.Test
// REF: https://gist.github.com/ademar111190/34d3de41308389a0d0d8?permalink_comment_id=4675859#gistcomment-4675859
class LevenshteinTest {
@Test
fun `Distance Test`() {
testDistance("", "", 0)
testDistance("1", "1", 0)
testDistance("1", "2", 1)
testDistance("12", "12", 0)
testDistance("123", "12", 1)
testDistance("1234", "1", 3)
testDistance("1234", "1233", 1)
testDistance("", "12345", 5)
testDistance("kitten", "mittens", 2)
testDistance("canada", "canad", 1)
testDistance("canad", "canada", 1)
}
private fun testDistance(a: String, b: String, expectedDistance: Int) {
val d = levenshteinDistance(a, b)
assertEquals(expectedDistance, d, "Distance did not match for `$a` and `$b`")
}
}

View file

@ -55,7 +55,6 @@ mpandroidchart = { module = "com.github.PhilJay:MPAndroidChart", version = "v3.1
nucleus-support-v7 = { module = "info.android15.nucleus:nucleus-support-v7", version.ref = "nucleus" } nucleus-support-v7 = { module = "info.android15.nucleus:nucleus-support-v7", version.ref = "nucleus" }
nucleus = { module = "info.android15.nucleus:nucleus", version.ref = "nucleus" } nucleus = { module = "info.android15.nucleus:nucleus", version.ref = "nucleus" }
java-nat-sort = { module = "com.github.gpanther:java-nat-sort", version = "natural-comparator-1.1" } java-nat-sort = { module = "com.github.gpanther:java-nat-sort", version = "natural-comparator-1.1" }
java-string-similarity = { module = "info.debatty:java-string-similarity", version = "2.0.0" }
jsoup = { module = "org.jsoup:jsoup", version = "1.17.1" } jsoup = { module = "org.jsoup:jsoup", version = "1.17.1" }
junit-engine = { module = "org.junit.jupiter:junit-jupiter-engine", version.ref = "junit" } junit-engine = { module = "org.junit.jupiter:junit-jupiter-engine", version.ref = "junit" }
junit-api = { module = "org.junit.jupiter:junit-jupiter-api", version.ref = "junit" } junit-api = { module = "org.junit.jupiter:junit-jupiter-api", version.ref = "junit" }