Commit b80a188b by David Malcolm Committed by David Malcolm

spellcheck: support transpositions aka Damerau-Levenshtein (PR other/69968)

gcc/fortran/ChangeLog:
	PR other/69968
	* misc.c (gfc_closest_fuzzy_match): Update for renaming of
	levenshtein_distance to get_edit_distance.

gcc/ChangeLog:
	PR other/69968
	* spellcheck-tree.c (levenshtein_distance): Rename to...
	(get_edit_distance): ...this, and update for underlying renaming.
	* spellcheck-tree.h (levenshtein_distance): Rename to...
	(get_edit_distance): ...this.
	* spellcheck.c (levenshtein_distance): Rename to...
	(get_edit_distance): ...this.  Convert from Levenshtein distance
	to Damerau-Levenshtein distance by supporting transpositions of
	adjacent characters.  Rename "v1" to "v_next" and "v0" to
	"v_one_ago".
	(selftest::levenshtein_distance_unit_test_oneway): Rename to...
	(selftest::test_edit_distance_unit_test_oneway): ...this, and
	update for underlying renaming.
	(selftest::levenshtein_distance_unit_test): Rename to...
	(selftest::test_get_edit_distance_unit): ...this, and update for
	underlying renaming.
	(selftest::test_find_closest_string): Add example from PR 69968
	where transposition helps
	(selftest::test_metric_conditions): Update for renaming.
	(selftest::test_metric_conditions): Likewise.
	(selftest::spellcheck_c_tests): Likewise.
	* spellcheck.h (levenshtein_distance): Rename both overloads to...
	(get_edit_distance): ...this.
	(best_match::consider): Update for renaming.

gcc/testsuite/ChangeLog:
	PR other/69968
	* gcc.dg/spellcheck-transposition.c: New test.

From-SVN: r261521
parent e3329a78
2018-06-12 David Malcolm <dmalcolm@redhat.com>
PR other/69968
* spellcheck-tree.c (levenshtein_distance): Rename to...
(get_edit_distance): ...this, and update for underlying renaming.
* spellcheck-tree.h (levenshtein_distance): Rename to...
(get_edit_distance): ...this.
* spellcheck.c (levenshtein_distance): Rename to...
(get_edit_distance): ...this. Convert from Levenshtein distance
to Damerau-Levenshtein distance by supporting transpositions of
adjacent characters. Rename "v1" to "v_next" and "v0" to
"v_one_ago".
(selftest::levenshtein_distance_unit_test_oneway): Rename to...
(selftest::test_edit_distance_unit_test_oneway): ...this, and
update for underlying renaming.
(selftest::levenshtein_distance_unit_test): Rename to...
(selftest::test_get_edit_distance_unit): ...this, and update for
underlying renaming.
(selftest::test_find_closest_string): Add example from PR 69968
where transposition helps
(selftest::test_metric_conditions): Update for renaming.
(selftest::test_metric_conditions): Likewise.
(selftest::spellcheck_c_tests): Likewise.
* spellcheck.h (levenshtein_distance): Rename both overloads to...
(get_edit_distance): ...this.
(best_match::consider): Update for renaming.
2018-06-12 Martin Sebor <msebor@redhat.com>
PR tree-optimization/85259
......
2018-06-12 David Malcolm <dmalcolm@redhat.com>
PR other/69968
* misc.c (gfc_closest_fuzzy_match): Update for renaming of
levenshtein_distance to get_edit_distance.
2018-06-12 Steven G. Kargl <kargl@gcc.gnu.org>
PR fortran/44491
......
......@@ -286,7 +286,7 @@ get_c_kind(const char *c_kind_name, CInteropKind_t kinds_table[])
/* For a given name TYPO, determine the best candidate from CANDIDATES
perusing Levenshtein distance. Frees CANDIDATES before returning. */
using get_edit_distance. Frees CANDIDATES before returning. */
const char *
gfc_closest_fuzzy_match (const char *typo, char **candidates)
......@@ -299,7 +299,7 @@ gfc_closest_fuzzy_match (const char *typo, char **candidates)
while (cand && *cand)
{
edit_distance_t dist = levenshtein_distance (typo, tl, *cand,
edit_distance_t dist = get_edit_distance (typo, tl, *cand,
strlen (*cand));
if (dist < best_distance)
{
......
......@@ -27,18 +27,18 @@ along with GCC; see the file COPYING3. If not see
#include "selftest.h"
#include "stringpool.h"
/* Calculate Levenshtein distance between two identifiers. */
/* Calculate edit distance between two identifiers. */
edit_distance_t
levenshtein_distance (tree ident_s, tree ident_t)
get_edit_distance (tree ident_s, tree ident_t)
{
gcc_assert (TREE_CODE (ident_s) == IDENTIFIER_NODE);
gcc_assert (TREE_CODE (ident_t) == IDENTIFIER_NODE);
return levenshtein_distance (IDENTIFIER_POINTER (ident_s),
IDENTIFIER_LENGTH (ident_s),
IDENTIFIER_POINTER (ident_t),
IDENTIFIER_LENGTH (ident_t));
return get_edit_distance (IDENTIFIER_POINTER (ident_s),
IDENTIFIER_LENGTH (ident_s),
IDENTIFIER_POINTER (ident_t),
IDENTIFIER_LENGTH (ident_t));
}
/* Given TARGET, an identifier, and CANDIDATES, a vec of identifiers,
......
......@@ -25,7 +25,7 @@ along with GCC; see the file COPYING3. If not see
/* spellcheck-tree.c */
extern edit_distance_t
levenshtein_distance (tree ident_s, tree ident_t);
get_edit_distance (tree ident_s, tree ident_t);
extern tree
find_closest_identifier (tree target, const auto_vec<tree> *candidates);
......
......@@ -25,11 +25,11 @@ const edit_distance_t MAX_EDIT_DISTANCE = UINT_MAX;
/* spellcheck.c */
extern edit_distance_t
levenshtein_distance (const char *s, int len_s,
const char *t, int len_t);
get_edit_distance (const char *s, int len_s,
const char *t, int len_t);
extern edit_distance_t
levenshtein_distance (const char *s, const char *t);
get_edit_distance (const char *s, const char *t);
extern const char *
find_closest_string (const char *target,
......@@ -73,7 +73,7 @@ struct edit_distance_traits<const char *>
This type accumulates the best possible match against GOAL_TYPE for
a sequence of elements of CANDIDATE_TYPE, whilst minimizing the
number of calls to levenshtein_distance and to
number of calls to get_edit_distance and to
edit_distance_traits<T>::get_length. */
template <typename GOAL_TYPE, typename CANDIDATE_TYPE>
......@@ -126,9 +126,9 @@ class best_match
/* Otherwise, compute the distance and see if the candidate
has beaten the previous best value. */
edit_distance_t dist
= levenshtein_distance (m_goal, m_goal_len,
candidate_traits::get_string (candidate),
candidate_len);
= get_edit_distance (m_goal, m_goal_len,
candidate_traits::get_string (candidate),
candidate_len);
if (dist < m_best_distance)
{
m_best_distance = dist;
......
2018-06-12 David Malcolm <dmalcolm@redhat.com>
PR other/69968
* gcc.dg/spellcheck-transposition.c: New test.
2018-06-12 Steven G. Kargl <kargl@gcc.gnu.org>
PR fortran/44491
......
/* PR other/69968. */
struct {
int coordx, coordy, coordz;
int coordx1, coordy1, coordz1;
} c;
/* Consider the misspelling "coorzd1".
With Levenshtein distance, the misspelling has an edit distance of 2
to all 6 of the fields (e.g. via a deletion and a substitution for the
first three, and via deletion and insertion for the second three).
With Damerau-Levenshtein, the misspelling has an edit distance of 1
via transposition to "coordz1", and 2 to the other fields. */
void foo (void)
{
c.coorzd1 = c.coordy; /* { dg-error "has no member named 'coorzd1'; did you mean 'coordz1'" } */
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment