refactor: Replace java-string-similarity with pure Kotlin Levenshtein implementation

This commit is contained in:
Ahmad Ansori Palembani 2024-07-31 12:20:40 +07:00
parent 8cca5186dd
commit f64bdb2ca5
Signed by: null2264
GPG key ID: BA64F8B60AF3EFB6
5 changed files with 88 additions and 9 deletions

View file

@ -267,9 +267,6 @@ dependencies {
implementation(platform(kotlinx.coroutines.bom))
implementation(kotlinx.bundles.coroutines)
// Text distance
implementation(libs.java.string.similarity)
// TLS 1.3 support for Android < 10
implementation(libs.conscrypt)

View file

@ -6,12 +6,12 @@ import eu.kanade.tachiyomi.domain.manga.models.Manga
import eu.kanade.tachiyomi.source.CatalogueSource
import eu.kanade.tachiyomi.source.model.SManga
import eu.kanade.tachiyomi.util.lang.toNormalized
import info.debatty.java.stringsimilarity.NormalizedLevenshtein
import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.Job
import kotlinx.coroutines.supervisorScope
import uy.kohesive.injekt.injectLazy
import yokai.util.normalizedLevenshteinSimilarity
import kotlin.coroutines.CoroutineContext
class SmartSearchEngine(
@ -22,8 +22,6 @@ class SmartSearchEngine(
private val db: DatabaseHelper by injectLazy()
private val normalizedLevenshtein = NormalizedLevenshtein()
/*suspend fun smartSearch(source: CatalogueSource, title: String): SManga? {
val cleanedTitle = cleanSmartSearchTitle(title)
@ -40,7 +38,7 @@ class SmartSearchEngine(
searchResults.mangas.map {
val cleanedMangaTitle = cleanSmartSearchTitle(it.title)
val normalizedDistance = normalizedLevenshtein.similarity(cleanedTitle, cleanedMangaTitle)
val normalizedDistance = normalizedLevenshteinSimilarity(cleanedTitle, cleanedMangaTitle)
SearchEntry(it, normalizedDistance)
}.filter { (_, normalizedDistance) ->
normalizedDistance >= MIN_SMART_ELIGIBLE_THRESHOLD
@ -68,7 +66,7 @@ class SmartSearchEngine(
}
searchResults.mangas.map {
val normalizedDistance = normalizedLevenshtein.similarity(titleNormalized, it.title.toNormalized())
val normalizedDistance = normalizedLevenshteinSimilarity(titleNormalized, it.title.toNormalized())
SearchEntry(it, normalizedDistance)
}.filter { (_, normalizedDistance) ->
normalizedDistance >= MIN_NORMAL_ELIGIBLE_THRESHOLD
@ -77,6 +75,7 @@ class SmartSearchEngine(
return eligibleManga.maxByOrNull { it.dist }?.manga
}
private fun removeTextInBrackets(text: String, readForward: Boolean): String {
val bracketPairs = listOf(
'(' to ')',

View file

@ -0,0 +1,57 @@
package yokai.util
import kotlin.math.max
import kotlin.math.min
/**
* Modified version of ademar111190's Levenshtein implementation
*
* REF: https://gist.github.com/ademar111190/34d3de41308389a0d0d8
*/
fun levenshteinDistance(lhs : CharSequence, rhs : CharSequence): Int {
if (lhs == rhs) return 0
if (lhs.isEmpty()) return rhs.length
if (rhs.isEmpty()) return lhs.length
val lhsLength = lhs.length + 1
val rhsLength = rhs.length + 1
var cost = Array(lhsLength) { it }
var newCost = Array(lhsLength) { 0 }
for (i in 1..<rhsLength) {
newCost[0] = i
var minCost = i
for (j in 1..<lhsLength) {
val match = if (lhs[j - 1] == rhs[i - 1]) 0 else 1
val costReplace = cost[j - 1] + match
val costInsert = cost[j] + 1
val costDelete = newCost[j - 1] + 1
newCost[j] = min(min(costInsert, costDelete), costReplace)
minCost = min(minCost, newCost[j])
}
// Hardcode limit to integer limit, just in case
if (minCost >= Int.MAX_VALUE) return Int.MAX_VALUE
val swap = cost
cost = newCost
newCost = swap
}
return cost.last()
}
fun normalizedLevenshteinSimilarity(lhs : CharSequence, rhs : CharSequence): Double {
val distance by lazy {
val maxLength = max(lhs.length, rhs.length)
if (maxLength == 0) return@lazy 0.0
levenshteinDistance(lhs, rhs) / maxLength.toDouble()
}
return 1.0 - distance
}

View file

@ -0,0 +1,27 @@
package yokai.util
import org.junit.jupiter.api.Assertions.assertEquals
import org.junit.jupiter.api.Test
// REF: https://gist.github.com/ademar111190/34d3de41308389a0d0d8?permalink_comment_id=4675859#gistcomment-4675859
class LevenshteinTest {
@Test
fun `Distance Test`() {
testDistance("", "", 0)
testDistance("1", "1", 0)
testDistance("1", "2", 1)
testDistance("12", "12", 0)
testDistance("123", "12", 1)
testDistance("1234", "1", 3)
testDistance("1234", "1233", 1)
testDistance("", "12345", 5)
testDistance("kitten", "mittens", 2)
testDistance("canada", "canad", 1)
testDistance("canad", "canada", 1)
}
private fun testDistance(a: String, b: String, expectedDistance: Int) {
val d = levenshteinDistance(a, b)
assertEquals(expectedDistance, d, "Distance did not match for `$a` and `$b`")
}
}

View file

@ -55,7 +55,6 @@ mpandroidchart = { module = "com.github.PhilJay:MPAndroidChart", version = "v3.1
nucleus-support-v7 = { module = "info.android15.nucleus:nucleus-support-v7", version.ref = "nucleus" }
nucleus = { module = "info.android15.nucleus:nucleus", version.ref = "nucleus" }
java-nat-sort = { module = "com.github.gpanther:java-nat-sort", version = "natural-comparator-1.1" }
java-string-similarity = { module = "info.debatty:java-string-similarity", version = "2.0.0" }
jsoup = { module = "org.jsoup:jsoup", version = "1.17.1" }
junit-engine = { module = "org.junit.jupiter:junit-jupiter-engine", version.ref = "junit" }
junit-api = { module = "org.junit.jupiter:junit-jupiter-api", version.ref = "junit" }