levenshtein/levenshtein.go

107 lines
2.1 KiB
Go
Raw Permalink Normal View History

2023-10-09 19:13:11 +02:00
package levenshtein
type BestFittingChoice struct {
Ratio float64
Key string
Value string
}
// GetBestFittingChoice returns the best fitting key value pair in choices for s as the potential key.
func GetBestFittingChoice(s string, choices map[string]string) BestFittingChoice {
var result BestFittingChoice = BestFittingChoice{
Ratio: 0.0,
Key: "",
Value: "",
}
value, perfectMatch := choices[s]
if perfectMatch {
result.Ratio = 1.0
result.Key = s
result.Value = value
return result
}
for k, v := range choices {
r := calculateRatioForStrings(s, k)
if r > result.Ratio {
result.Ratio = r
result.Key = k
result.Value = v
}
}
return result
}
func buildMatrixForStrings(source []rune, target []rune) [][]int64 {
var costsOfDeletion int64 = 1
var costsOfInsertion int64 = 1
var costsOfSubstitutions int64 = 2
var rows int64 = int64(len(source) + 1)
var columns int64 = int64(len(target) + 1)
matrix := make([][]int64, rows)
for i := int64(0); i < rows; i++ {
matrix[i] = make([]int64, columns)
matrix[i][0] = i * costsOfDeletion
}
for j := int64(1); j < columns; j++ {
matrix[0][j] = j * costsOfInsertion
}
for i := int64(1); i < rows; i++ {
for j := int64(1); j < columns; j++ {
delCost := matrix[i-1][j] + costsOfDeletion
matchSubCost := matrix[i-1][j-1]
if source[i-1] != target[j-1] {
matchSubCost += costsOfSubstitutions
}
insCost := matrix[i][j-1] + costsOfInsertion
matrix[i][j] = min(
delCost,
min(
matchSubCost,
insCost,
),
)
}
}
return matrix
}
func calculateRatioForStrings(s1 string, s2 string) float64 {
return calculateRatioOfMatrix(
buildMatrixForStrings(
[]rune(s1),
[]rune(s2),
),
)
}
func calculateRatioOfMatrix(matrix [][]int64) float64 {
lenSource := int64(len(matrix) - 1)
lenTarget := int64(len(matrix[0]) - 1)
sum := lenSource + lenTarget
if sum == 0 {
return 0
}
return float64(sum-getDistanceOfMatrix(matrix)) / float64(sum)
}
func getDistanceOfMatrix(matrix [][]int64) int64 {
return matrix[len(matrix)-1][len(matrix[0])-1]
}
func min(a int64, b int64) int64 {
if b < a {
return b
}
return a
}