Commit b80a188b by David Malcolm Committed by David Malcolm

spellcheck: support transpositions aka Damerau-Levenshtein (PR other/69968)

gcc/fortran/ChangeLog:
	PR other/69968
	* misc.c (gfc_closest_fuzzy_match): Update for renaming of
	levenshtein_distance to get_edit_distance.

gcc/ChangeLog:
	PR other/69968
	* spellcheck-tree.c (levenshtein_distance): Rename to...
	(get_edit_distance): ...this, and update for underlying renaming.
	* spellcheck-tree.h (levenshtein_distance): Rename to...
	(get_edit_distance): ...this.
	* spellcheck.c (levenshtein_distance): Rename to...
	(get_edit_distance): ...this.  Convert from Levenshtein distance
	to Damerau-Levenshtein distance by supporting transpositions of
	adjacent characters.  Rename "v1" to "v_next" and "v0" to
	"v_one_ago".
	(selftest::levenshtein_distance_unit_test_oneway): Rename to...
	(selftest::test_edit_distance_unit_test_oneway): ...this, and
	update for underlying renaming.
	(selftest::levenshtein_distance_unit_test): Rename to...
	(selftest::test_get_edit_distance_unit): ...this, and update for
	underlying renaming.
	(selftest::test_find_closest_string): Add example from PR 69968
	where transposition helps
	(selftest::test_metric_conditions): Update for renaming.
	(selftest::test_metric_conditions): Likewise.
	(selftest::spellcheck_c_tests): Likewise.
	* spellcheck.h (levenshtein_distance): Rename both overloads to...
	(get_edit_distance): ...this.
	(best_match::consider): Update for renaming.

gcc/testsuite/ChangeLog:
	PR other/69968
	* gcc.dg/spellcheck-transposition.c: New test.

From-SVN: r261521
parent e3329a78
2018-06-12 David Malcolm <dmalcolm@redhat.com>
PR other/69968
* spellcheck-tree.c (levenshtein_distance): Rename to...
(get_edit_distance): ...this, and update for underlying renaming.
* spellcheck-tree.h (levenshtein_distance): Rename to...
(get_edit_distance): ...this.
* spellcheck.c (levenshtein_distance): Rename to...
(get_edit_distance): ...this. Convert from Levenshtein distance
to Damerau-Levenshtein distance by supporting transpositions of
adjacent characters. Rename "v1" to "v_next" and "v0" to
"v_one_ago".
(selftest::levenshtein_distance_unit_test_oneway): Rename to...
(selftest::test_edit_distance_unit_test_oneway): ...this, and
update for underlying renaming.
(selftest::levenshtein_distance_unit_test): Rename to...
(selftest::test_get_edit_distance_unit): ...this, and update for
underlying renaming.
(selftest::test_find_closest_string): Add example from PR 69968
where transposition helps
(selftest::test_metric_conditions): Update for renaming.
(selftest::test_metric_conditions): Likewise.
(selftest::spellcheck_c_tests): Likewise.
* spellcheck.h (levenshtein_distance): Rename both overloads to...
(get_edit_distance): ...this.
(best_match::consider): Update for renaming.
2018-06-12 Martin Sebor <msebor@redhat.com> 2018-06-12 Martin Sebor <msebor@redhat.com>
PR tree-optimization/85259 PR tree-optimization/85259
......
2018-06-12 David Malcolm <dmalcolm@redhat.com>
PR other/69968
* misc.c (gfc_closest_fuzzy_match): Update for renaming of
levenshtein_distance to get_edit_distance.
2018-06-12 Steven G. Kargl <kargl@gcc.gnu.org> 2018-06-12 Steven G. Kargl <kargl@gcc.gnu.org>
PR fortran/44491 PR fortran/44491
......
...@@ -286,7 +286,7 @@ get_c_kind(const char *c_kind_name, CInteropKind_t kinds_table[]) ...@@ -286,7 +286,7 @@ get_c_kind(const char *c_kind_name, CInteropKind_t kinds_table[])
/* For a given name TYPO, determine the best candidate from CANDIDATES /* For a given name TYPO, determine the best candidate from CANDIDATES
perusing Levenshtein distance. Frees CANDIDATES before returning. */ using get_edit_distance. Frees CANDIDATES before returning. */
const char * const char *
gfc_closest_fuzzy_match (const char *typo, char **candidates) gfc_closest_fuzzy_match (const char *typo, char **candidates)
...@@ -299,7 +299,7 @@ gfc_closest_fuzzy_match (const char *typo, char **candidates) ...@@ -299,7 +299,7 @@ gfc_closest_fuzzy_match (const char *typo, char **candidates)
while (cand && *cand) while (cand && *cand)
{ {
edit_distance_t dist = levenshtein_distance (typo, tl, *cand, edit_distance_t dist = get_edit_distance (typo, tl, *cand,
strlen (*cand)); strlen (*cand));
if (dist < best_distance) if (dist < best_distance)
{ {
......
...@@ -27,18 +27,18 @@ along with GCC; see the file COPYING3. If not see ...@@ -27,18 +27,18 @@ along with GCC; see the file COPYING3. If not see
#include "selftest.h" #include "selftest.h"
#include "stringpool.h" #include "stringpool.h"
/* Calculate Levenshtein distance between two identifiers. */ /* Calculate edit distance between two identifiers. */
edit_distance_t edit_distance_t
levenshtein_distance (tree ident_s, tree ident_t) get_edit_distance (tree ident_s, tree ident_t)
{ {
gcc_assert (TREE_CODE (ident_s) == IDENTIFIER_NODE); gcc_assert (TREE_CODE (ident_s) == IDENTIFIER_NODE);
gcc_assert (TREE_CODE (ident_t) == IDENTIFIER_NODE); gcc_assert (TREE_CODE (ident_t) == IDENTIFIER_NODE);
return levenshtein_distance (IDENTIFIER_POINTER (ident_s), return get_edit_distance (IDENTIFIER_POINTER (ident_s),
IDENTIFIER_LENGTH (ident_s), IDENTIFIER_LENGTH (ident_s),
IDENTIFIER_POINTER (ident_t), IDENTIFIER_POINTER (ident_t),
IDENTIFIER_LENGTH (ident_t)); IDENTIFIER_LENGTH (ident_t));
} }
/* Given TARGET, an identifier, and CANDIDATES, a vec of identifiers, /* Given TARGET, an identifier, and CANDIDATES, a vec of identifiers,
......
...@@ -25,7 +25,7 @@ along with GCC; see the file COPYING3. If not see ...@@ -25,7 +25,7 @@ along with GCC; see the file COPYING3. If not see
/* spellcheck-tree.c */ /* spellcheck-tree.c */
extern edit_distance_t extern edit_distance_t
levenshtein_distance (tree ident_s, tree ident_t); get_edit_distance (tree ident_s, tree ident_t);
extern tree extern tree
find_closest_identifier (tree target, const auto_vec<tree> *candidates); find_closest_identifier (tree target, const auto_vec<tree> *candidates);
......
...@@ -25,11 +25,11 @@ const edit_distance_t MAX_EDIT_DISTANCE = UINT_MAX; ...@@ -25,11 +25,11 @@ const edit_distance_t MAX_EDIT_DISTANCE = UINT_MAX;
/* spellcheck.c */ /* spellcheck.c */
extern edit_distance_t extern edit_distance_t
levenshtein_distance (const char *s, int len_s, get_edit_distance (const char *s, int len_s,
const char *t, int len_t); const char *t, int len_t);
extern edit_distance_t extern edit_distance_t
levenshtein_distance (const char *s, const char *t); get_edit_distance (const char *s, const char *t);
extern const char * extern const char *
find_closest_string (const char *target, find_closest_string (const char *target,
...@@ -73,7 +73,7 @@ struct edit_distance_traits<const char *> ...@@ -73,7 +73,7 @@ struct edit_distance_traits<const char *>
This type accumulates the best possible match against GOAL_TYPE for This type accumulates the best possible match against GOAL_TYPE for
a sequence of elements of CANDIDATE_TYPE, whilst minimizing the a sequence of elements of CANDIDATE_TYPE, whilst minimizing the
number of calls to levenshtein_distance and to number of calls to get_edit_distance and to
edit_distance_traits<T>::get_length. */ edit_distance_traits<T>::get_length. */
template <typename GOAL_TYPE, typename CANDIDATE_TYPE> template <typename GOAL_TYPE, typename CANDIDATE_TYPE>
...@@ -126,9 +126,9 @@ class best_match ...@@ -126,9 +126,9 @@ class best_match
/* Otherwise, compute the distance and see if the candidate /* Otherwise, compute the distance and see if the candidate
has beaten the previous best value. */ has beaten the previous best value. */
edit_distance_t dist edit_distance_t dist
= levenshtein_distance (m_goal, m_goal_len, = get_edit_distance (m_goal, m_goal_len,
candidate_traits::get_string (candidate), candidate_traits::get_string (candidate),
candidate_len); candidate_len);
if (dist < m_best_distance) if (dist < m_best_distance)
{ {
m_best_distance = dist; m_best_distance = dist;
......
2018-06-12 David Malcolm <dmalcolm@redhat.com>
PR other/69968
* gcc.dg/spellcheck-transposition.c: New test.
2018-06-12 Steven G. Kargl <kargl@gcc.gnu.org> 2018-06-12 Steven G. Kargl <kargl@gcc.gnu.org>
PR fortran/44491 PR fortran/44491
......
/* PR other/69968. */
struct {
int coordx, coordy, coordz;
int coordx1, coordy1, coordz1;
} c;
/* Consider the misspelling "coorzd1".
With Levenshtein distance, the misspelling has an edit distance of 2
to all 6 of the fields (e.g. via a deletion and a substitution for the
first three, and via deletion and insertion for the second three).
With Damerau-Levenshtein, the misspelling has an edit distance of 1
via transposition to "coordz1", and 2 to the other fields. */
void foo (void)
{
c.coorzd1 = c.coordy; /* { dg-error "has no member named 'coorzd1'; did you mean 'coordz1'" } */
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment