Вычисление дистации Левенштайна
- package levenshtein
- // reference: https://en.wikipedia.org/wiki/Levenshtein_distance
- func Distance(a, b string) int {
- lol, kek := make([]int, len(b)+1), make([]int, len(b)+1)
- for k := range lol {
- lol[k] = k
- }
- for i := 0; i < len(a); i++ {
- kek[0] = i + 1
- for m := 0; m < len(b); m++ {
- delCost, insCost := lol[m+1]+1, kek[m]+1
- subsCost := lol[m]
- if a[i] != b[m] {
- subsCost = lol[m] + 1
- }
- kek[m+1] = min(delCost, insCost, subsCost)
- }
- lol, kek = kek, lol
- }
- return lol[len(b)]
- }
- func General(a, b string, insCost, subsCost, delCost int) int {
- d := make([][]int, len(a)+1)
- for k := range d {
- d[k] = make([]int, len(b)+1)
- }
- for i := 1; i <= len(a); i++ {
- d[i][0] = i * delCost
- }
- for i := 1; i <= len(b); i++ {
- d[0][i] = i * insCost
- }
- for i := 1; i <= len(a); i++ {
- for m := 1; m <= len(b); m++ {
- currDelCost, currInsCost := d[i-1][m]+delCost, d[i][m-1]+insCost
- currSubsCost := d[i-1][m-1]
- if a[i-1] != b[m-1] {
- currSubsCost += subsCost
- }
- d[i][m] = min(currDelCost, currInsCost, currSubsCost)
- }
- }
- return d[len(a)][len(b)]
- }
- func min(a, b, c int) int {
- if a < b && a < c {
- return a
- }
- if b < a && b < c {
- return b
- }
- return c
- }
- package levenshtein
- import (
- "math/rand"
- "strings"
- "testing"
- "time"
- )
- // -------------------------------------------------------
- // tests
- // -------------------------------------------------------
- func TestDistance(t *testing.T) {
- a, b := "sitting", "kitten"
- if got, want := Distance(a, b), 3; got != want {
- t.Errorf("Distance(%#v, %#v) = %#v; want %#v", a, b, got, want)
- }
- if got, want := Distance(b, a), 3; got != want {
- t.Errorf("Distance(%#v, %#v) = %#v; want %#v", b, a, got, want)
- }
- a, b = "Sunday", "Saturday"
- if got, want := Distance(a, b), 3; got != want {
- t.Errorf("Distance(%#v, %#v) = %#v; want %#v", a, b, got, want)
- }
- if got, want := Distance(b, a), 3; got != want {
- t.Errorf("Distance(%#v, %#v) = %#v; want %#v", b, a, got, want)
- }
- }
- func TestGeneral(t *testing.T) {
- insCost, subsCost, delCost := 2, 4, 16
- a, b := "sitting", "kitten"
- if got, want := General(a, b, insCost, subsCost, delCost), 24; got != want {
- t.Errorf(
- "General(%#v, %#v, %#v, %#v, %#v) = %#v; want %#v",
- a, b, insCost, subsCost, delCost, got, want,
- )
- }
- if got, want := General(b, a, insCost, subsCost, delCost), 10; got != want {
- t.Errorf(
- "General(%#v, %#v, %#v, %#v, %#v) = %#v; want %#v",
- a, b, insCost, subsCost, delCost, got, want,
- )
- }
- a, b = "Sunday", "Saturday"
- if got, want := General(a, b, insCost, subsCost, delCost), 8; got != want {
- t.Errorf(
- "General(%#v, %#v, %#v, %#v, %#v) = %#v; want %#v",
- a, b, insCost, subsCost, delCost, got, want,
- )
- }
- if got, want := General(b, a, insCost, subsCost, delCost), 36; got != want {
- t.Errorf(
- "General(%#v, %#v, %#v, %#v, %#v) = %#v; want %#v",
- a, b, insCost, subsCost, delCost, got, want,
- )
- }
- }
- // -------------------------------------------------------
- // benchmarks
- // -------------------------------------------------------
- func BenchmarkDistance(b *testing.B) {
- lol, kek := make([]string, 200), make([]string, 400)
- r := rand.New(rand.NewSource(time.Now().UnixNano()))
- for k := range lol {
- lol[k] = genRandomString(r)
- }
- for k := range kek {
- kek[k] = genRandomString(r)
- }
- b.ResetTimer()
- for _, v1 := range lol {
- for _, v2 := range kek {
- Distance(v1, v2)
- }
- }
- }
- func BenchmarkGeneral(b *testing.B) {
- lol, kek := make([]string, 200), make([]string, 400)
- r := rand.New(rand.NewSource(time.Now().UnixNano()))
- for k := range lol {
- lol[k] = genRandomString(r)
- }
- for k := range kek {
- kek[k] = genRandomString(r)
- }
- b.ResetTimer()
- for _, v1 := range lol {
- for _, v2 := range kek {
- General(v1, v2, 2, 4, 16)
- }
- }
- }
- // -------------------------------------------------------
- // utils
- // -------------------------------------------------------
- func genRandomString(r *rand.Rand) string {
- /* settings */
- var (
- parts = 3
- charsPerPart = 6
- delimiter = "-"
- )
- chars := "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYX0123456789"
- o := make([]string, parts)
- for k := range o {
- for i := 0; i < charsPerPart; i++ {
- o[k] += string(chars[r.Intn(len(chars))])
- }
- }
- return strings.Join(o, delimiter)
- }
Алгоритм вычисляет дистанцию Левенштайна между двумя строками. Это можно использовать для исправления опечаток в вводе пользователя.
Пример использования:
Функция General позволяет указать стоимость отдельно для вставки, замены и удаления. Но эта функция работает медленнее Distance. Сравнение производительности:
Пример использования:
- a, b := "hmail.com", "hotmail.com"
- if levenshtein.Distance(a, b) <= 2 {
- fmt.Printf("did you mean %s?\n", b)
- }
Функция General позволяет указать стоимость отдельно для вставки, замены и удаления. Но эта функция работает медленнее Distance. Сравнение производительности:
- $ go test -bench=.
- goos: linux
- goarch: amd64
- pkg: go_levenshtein_example/levenshtein
- BenchmarkDistance-2 2000000000 0.16 ns/op
- BenchmarkGeneral-2 2000000000 0.36 ns/op
- PASS
- ok go_levenshtein_example/levenshtein 25.023s