mirror of
https://github.com/null2264/yokai.git
synced 2025-06-21 10:44:42 +00:00
refactor: Replace java-string-similarity with pure Kotlin Levenshtein implementation
This commit is contained in:
parent
8cca5186dd
commit
f64bdb2ca5
5 changed files with 88 additions and 9 deletions
|
@ -267,9 +267,6 @@ dependencies {
|
|||
implementation(platform(kotlinx.coroutines.bom))
|
||||
implementation(kotlinx.bundles.coroutines)
|
||||
|
||||
// Text distance
|
||||
implementation(libs.java.string.similarity)
|
||||
|
||||
// TLS 1.3 support for Android < 10
|
||||
implementation(libs.conscrypt)
|
||||
|
||||
|
|
|
@ -6,12 +6,12 @@ import eu.kanade.tachiyomi.domain.manga.models.Manga
|
|||
import eu.kanade.tachiyomi.source.CatalogueSource
|
||||
import eu.kanade.tachiyomi.source.model.SManga
|
||||
import eu.kanade.tachiyomi.util.lang.toNormalized
|
||||
import info.debatty.java.stringsimilarity.NormalizedLevenshtein
|
||||
import kotlinx.coroutines.CoroutineScope
|
||||
import kotlinx.coroutines.Dispatchers
|
||||
import kotlinx.coroutines.Job
|
||||
import kotlinx.coroutines.supervisorScope
|
||||
import uy.kohesive.injekt.injectLazy
|
||||
import yokai.util.normalizedLevenshteinSimilarity
|
||||
import kotlin.coroutines.CoroutineContext
|
||||
|
||||
class SmartSearchEngine(
|
||||
|
@ -22,8 +22,6 @@ class SmartSearchEngine(
|
|||
|
||||
private val db: DatabaseHelper by injectLazy()
|
||||
|
||||
private val normalizedLevenshtein = NormalizedLevenshtein()
|
||||
|
||||
/*suspend fun smartSearch(source: CatalogueSource, title: String): SManga? {
|
||||
val cleanedTitle = cleanSmartSearchTitle(title)
|
||||
|
||||
|
@ -40,7 +38,7 @@ class SmartSearchEngine(
|
|||
|
||||
searchResults.mangas.map {
|
||||
val cleanedMangaTitle = cleanSmartSearchTitle(it.title)
|
||||
val normalizedDistance = normalizedLevenshtein.similarity(cleanedTitle, cleanedMangaTitle)
|
||||
val normalizedDistance = normalizedLevenshteinSimilarity(cleanedTitle, cleanedMangaTitle)
|
||||
SearchEntry(it, normalizedDistance)
|
||||
}.filter { (_, normalizedDistance) ->
|
||||
normalizedDistance >= MIN_SMART_ELIGIBLE_THRESHOLD
|
||||
|
@ -68,7 +66,7 @@ class SmartSearchEngine(
|
|||
}
|
||||
|
||||
searchResults.mangas.map {
|
||||
val normalizedDistance = normalizedLevenshtein.similarity(titleNormalized, it.title.toNormalized())
|
||||
val normalizedDistance = normalizedLevenshteinSimilarity(titleNormalized, it.title.toNormalized())
|
||||
SearchEntry(it, normalizedDistance)
|
||||
}.filter { (_, normalizedDistance) ->
|
||||
normalizedDistance >= MIN_NORMAL_ELIGIBLE_THRESHOLD
|
||||
|
@ -77,6 +75,7 @@ class SmartSearchEngine(
|
|||
|
||||
return eligibleManga.maxByOrNull { it.dist }?.manga
|
||||
}
|
||||
|
||||
private fun removeTextInBrackets(text: String, readForward: Boolean): String {
|
||||
val bracketPairs = listOf(
|
||||
'(' to ')',
|
||||
|
|
57
app/src/main/java/yokai/util/Levenshtein.kt
Normal file
57
app/src/main/java/yokai/util/Levenshtein.kt
Normal file
|
@ -0,0 +1,57 @@
|
|||
package yokai.util
|
||||
|
||||
import kotlin.math.max
|
||||
import kotlin.math.min
|
||||
|
||||
/**
|
||||
* Modified version of ademar111190's Levenshtein implementation
|
||||
*
|
||||
* REF: https://gist.github.com/ademar111190/34d3de41308389a0d0d8
|
||||
*/
|
||||
fun levenshteinDistance(lhs : CharSequence, rhs : CharSequence): Int {
|
||||
if (lhs == rhs) return 0
|
||||
if (lhs.isEmpty()) return rhs.length
|
||||
if (rhs.isEmpty()) return lhs.length
|
||||
|
||||
val lhsLength = lhs.length + 1
|
||||
val rhsLength = rhs.length + 1
|
||||
|
||||
var cost = Array(lhsLength) { it }
|
||||
var newCost = Array(lhsLength) { 0 }
|
||||
|
||||
for (i in 1..<rhsLength) {
|
||||
newCost[0] = i
|
||||
|
||||
var minCost = i
|
||||
|
||||
for (j in 1..<lhsLength) {
|
||||
val match = if (lhs[j - 1] == rhs[i - 1]) 0 else 1
|
||||
|
||||
val costReplace = cost[j - 1] + match
|
||||
val costInsert = cost[j] + 1
|
||||
val costDelete = newCost[j - 1] + 1
|
||||
|
||||
newCost[j] = min(min(costInsert, costDelete), costReplace)
|
||||
minCost = min(minCost, newCost[j])
|
||||
}
|
||||
|
||||
// Hardcode limit to integer limit, just in case
|
||||
if (minCost >= Int.MAX_VALUE) return Int.MAX_VALUE
|
||||
|
||||
val swap = cost
|
||||
cost = newCost
|
||||
newCost = swap
|
||||
}
|
||||
|
||||
return cost.last()
|
||||
}
|
||||
|
||||
fun normalizedLevenshteinSimilarity(lhs : CharSequence, rhs : CharSequence): Double {
|
||||
val distance by lazy {
|
||||
val maxLength = max(lhs.length, rhs.length)
|
||||
if (maxLength == 0) return@lazy 0.0
|
||||
levenshteinDistance(lhs, rhs) / maxLength.toDouble()
|
||||
}
|
||||
|
||||
return 1.0 - distance
|
||||
}
|
27
app/src/test/java/yokai/util/LevenshteinTest.kt
Normal file
27
app/src/test/java/yokai/util/LevenshteinTest.kt
Normal file
|
@ -0,0 +1,27 @@
|
|||
package yokai.util
|
||||
|
||||
import org.junit.jupiter.api.Assertions.assertEquals
|
||||
import org.junit.jupiter.api.Test
|
||||
|
||||
// REF: https://gist.github.com/ademar111190/34d3de41308389a0d0d8?permalink_comment_id=4675859#gistcomment-4675859
|
||||
class LevenshteinTest {
|
||||
@Test
|
||||
fun `Distance Test`() {
|
||||
testDistance("", "", 0)
|
||||
testDistance("1", "1", 0)
|
||||
testDistance("1", "2", 1)
|
||||
testDistance("12", "12", 0)
|
||||
testDistance("123", "12", 1)
|
||||
testDistance("1234", "1", 3)
|
||||
testDistance("1234", "1233", 1)
|
||||
testDistance("", "12345", 5)
|
||||
testDistance("kitten", "mittens", 2)
|
||||
testDistance("canada", "canad", 1)
|
||||
testDistance("canad", "canada", 1)
|
||||
}
|
||||
|
||||
private fun testDistance(a: String, b: String, expectedDistance: Int) {
|
||||
val d = levenshteinDistance(a, b)
|
||||
assertEquals(expectedDistance, d, "Distance did not match for `$a` and `$b`")
|
||||
}
|
||||
}
|
|
@ -55,7 +55,6 @@ mpandroidchart = { module = "com.github.PhilJay:MPAndroidChart", version = "v3.1
|
|||
nucleus-support-v7 = { module = "info.android15.nucleus:nucleus-support-v7", version.ref = "nucleus" }
|
||||
nucleus = { module = "info.android15.nucleus:nucleus", version.ref = "nucleus" }
|
||||
java-nat-sort = { module = "com.github.gpanther:java-nat-sort", version = "natural-comparator-1.1" }
|
||||
java-string-similarity = { module = "info.debatty:java-string-similarity", version = "2.0.0" }
|
||||
jsoup = { module = "org.jsoup:jsoup", version = "1.17.1" }
|
||||
junit-engine = { module = "org.junit.jupiter:junit-jupiter-engine", version.ref = "junit" }
|
||||
junit-api = { module = "org.junit.jupiter:junit-jupiter-api", version.ref = "junit" }
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue