/* * Copyright (C) the libgit2 contributors. All rights reserved. * * This file is part of libgit2, distributed under the GNU GPL v2 with * a Linking Exception. For full terms see the included COPYING file. */ #include "pathspec.h" #include "git2/pathspec.h" #include "git2/diff.h" #include "buf_text.h" #include "attr_file.h" #include "iterator.h" #include "repository.h" #include "index.h" #include "bitvec.h" #include "diff.h" #include "wildmatch.h" /* what is the common non-wildcard prefix for all items in the pathspec */ char *git_pathspec_prefix(const git_strarray *pathspec) { git_buf prefix = GIT_BUF_INIT; const char *scan; if (!pathspec || !pathspec->count || git_buf_text_common_prefix(&prefix, pathspec) < 0) return NULL; /* diff prefix will only be leading non-wildcards */ for (scan = prefix.ptr; *scan; ++scan) { if (git__iswildcard(*scan) && (scan == prefix.ptr || (*(scan - 1) != '\\'))) break; } git_buf_truncate(&prefix, scan - prefix.ptr); if (prefix.size <= 0) { git_buf_dispose(&prefix); return NULL; } git_buf_text_unescape(&prefix); return git_buf_detach(&prefix); } /* is there anything in the spec that needs to be filtered on */ bool git_pathspec_is_empty(const git_strarray *pathspec) { size_t i; if (pathspec == NULL) return true; for (i = 0; i < pathspec->count; ++i) { const char *str = pathspec->strings[i]; if (str && str[0]) return false; } return true; } /* build a vector of fnmatch patterns to evaluate efficiently */ int git_pathspec__vinit( git_vector *vspec, const git_strarray *strspec, git_pool *strpool) { size_t i; memset(vspec, 0, sizeof(*vspec)); if (git_pathspec_is_empty(strspec)) return 0; if (git_vector_init(vspec, strspec->count, NULL) < 0) return -1; for (i = 0; i < strspec->count; ++i) { int ret; const char *pattern = strspec->strings[i]; git_attr_fnmatch *match = git__calloc(1, sizeof(git_attr_fnmatch)); if (!match) return -1; match->flags = GIT_ATTR_FNMATCH_ALLOWSPACE | GIT_ATTR_FNMATCH_ALLOWNEG; ret = git_attr_fnmatch__parse(match, strpool, NULL, &pattern); if (ret == GIT_ENOTFOUND) { git__free(match); continue; } else if (ret < 0) { git__free(match); return ret; } if (git_vector_insert(vspec, match) < 0) return -1; } return 0; } /* free data from the pathspec vector */ void git_pathspec__vfree(git_vector *vspec) { git_vector_free_deep(vspec); } struct pathspec_match_context { int wildmatch_flags; int (*strcomp)(const char *, const char *); int (*strncomp)(const char *, const char *, size_t); }; static void pathspec_match_context_init( struct pathspec_match_context *ctxt, bool disable_fnmatch, bool casefold) { if (disable_fnmatch) ctxt->wildmatch_flags = -1; else if (casefold) ctxt->wildmatch_flags = WM_CASEFOLD; else ctxt->wildmatch_flags = 0; if (casefold) { ctxt->strcomp = git__strcasecmp; ctxt->strncomp = git__strncasecmp; } else { ctxt->strcomp = git__strcmp; ctxt->strncomp = git__strncmp; } } static int pathspec_match_one( const git_attr_fnmatch *match, struct pathspec_match_context *ctxt, const char *path) { int result = (match->flags & GIT_ATTR_FNMATCH_MATCH_ALL) ? 0 : WM_NOMATCH; if (result == WM_NOMATCH) result = ctxt->strcomp(match->pattern, path) ? WM_NOMATCH : 0; if (ctxt->wildmatch_flags >= 0 && result == WM_NOMATCH) result = wildmatch(match->pattern, path, ctxt->wildmatch_flags); /* if we didn't match, look for exact dirname prefix match */ if (result == WM_NOMATCH && (match->flags & GIT_ATTR_FNMATCH_HASWILD) == 0 && ctxt->strncomp(path, match->pattern, match->length) == 0 && path[match->length] == '/') result = 0; /* if we didn't match and this is a negative match, check for exact * match of filename with leading '!' */ if (result == WM_NOMATCH && (match->flags & GIT_ATTR_FNMATCH_NEGATIVE) != 0 && *path == '!' && ctxt->strncomp(path + 1, match->pattern, match->length) == 0 && (!path[match->length + 1] || path[match->length + 1] == '/')) return 1; if (result == 0) return (match->flags & GIT_ATTR_FNMATCH_NEGATIVE) ? 0 : 1; return -1; } static int git_pathspec__match_at( size_t *matched_at, const git_vector *vspec, struct pathspec_match_context *ctxt, const char *path0, const char *path1) { int result = GIT_ENOTFOUND; size_t i = 0; const git_attr_fnmatch *match; git_vector_foreach(vspec, i, match) { if (path0 && (result = pathspec_match_one(match, ctxt, path0)) >= 0) break; if (path1 && (result = pathspec_match_one(match, ctxt, path1)) >= 0) break; } *matched_at = i; return result; } /* match a path against the vectorized pathspec */ bool git_pathspec__match( const git_vector *vspec, const char *path, bool disable_fnmatch, bool casefold, const char **matched_pathspec, size_t *matched_at) { int result; size_t pos; struct pathspec_match_context ctxt; if (matched_pathspec) *matched_pathspec = NULL; if (matched_at) *matched_at = GIT_PATHSPEC_NOMATCH; if (!vspec || !vspec->length) return true; pathspec_match_context_init(&ctxt, disable_fnmatch, casefold); result = git_pathspec__match_at(&pos, vspec, &ctxt, path, NULL); if (result >= 0) { if (matched_pathspec) { const git_attr_fnmatch *match = git_vector_get(vspec, pos); *matched_pathspec = match->pattern; } if (matched_at) *matched_at = pos; } return (result > 0); } int git_pathspec__init(git_pathspec *ps, const git_strarray *paths) { int error = 0; memset(ps, 0, sizeof(*ps)); ps->prefix = git_pathspec_prefix(paths); if ((error = git_pool_init(&ps->pool, 1)) < 0 || (error = git_pathspec__vinit(&ps->pathspec, paths, &ps->pool)) < 0) git_pathspec__clear(ps); return error; } void git_pathspec__clear(git_pathspec *ps) { git__free(ps->prefix); git_pathspec__vfree(&ps->pathspec); git_pool_clear(&ps->pool); memset(ps, 0, sizeof(*ps)); } int git_pathspec_new(git_pathspec **out, const git_strarray *pathspec) { int error = 0; git_pathspec *ps = git__malloc(sizeof(git_pathspec)); GIT_ERROR_CHECK_ALLOC(ps); if ((error = git_pathspec__init(ps, pathspec)) < 0) { git__free(ps); return error; } GIT_REFCOUNT_INC(ps); *out = ps; return 0; } static void pathspec_free(git_pathspec *ps) { git_pathspec__clear(ps); git__free(ps); } void git_pathspec_free(git_pathspec *ps) { if (!ps) return; GIT_REFCOUNT_DEC(ps, pathspec_free); } int git_pathspec_matches_path( const git_pathspec *ps, uint32_t flags, const char *path) { bool no_fnmatch = (flags & GIT_PATHSPEC_NO_GLOB) != 0; bool casefold = (flags & GIT_PATHSPEC_IGNORE_CASE) != 0; assert(ps && path); return (0 != git_pathspec__match( &ps->pathspec, path, no_fnmatch, casefold, NULL, NULL)); } static void pathspec_match_free(git_pathspec_match_list *m) { if (!m) return; git_pathspec_free(m->pathspec); m->pathspec = NULL; git_array_clear(m->matches); git_array_clear(m->failures); git_pool_clear(&m->pool); git__free(m); } static git_pathspec_match_list *pathspec_match_alloc( git_pathspec *ps, int datatype) { git_pathspec_match_list *m = git__calloc(1, sizeof(git_pathspec_match_list)); if (!m) return NULL; if (git_pool_init(&m->pool, 1) < 0) return NULL; /* need to keep reference to pathspec and increment refcount because * failures array stores pointers to the pattern strings of the * pathspec that had no matches */ GIT_REFCOUNT_INC(ps); m->pathspec = ps; m->datatype = datatype; return m; } GIT_INLINE(size_t) pathspec_mark_pattern(git_bitvec *used, size_t pos) { if (!git_bitvec_get(used, pos)) { git_bitvec_set(used, pos, true); return 1; } return 0; } static size_t pathspec_mark_remaining( git_bitvec *used, git_vector *patterns, struct pathspec_match_context *ctxt, size_t start, const char *path0, const char *path1) { size_t count = 0; if (path1 == path0) path1 = NULL; for (; start < patterns->length; ++start) { const git_attr_fnmatch *pat = git_vector_get(patterns, start); if (git_bitvec_get(used, start)) continue; if (path0 && pathspec_match_one(pat, ctxt, path0) > 0) count += pathspec_mark_pattern(used, start); else if (path1 && pathspec_match_one(pat, ctxt, path1) > 0) count += pathspec_mark_pattern(used, start); } return count; } static int pathspec_build_failure_array( git_pathspec_string_array_t *failures, git_vector *patterns, git_bitvec *used, git_pool *pool) { size_t pos; char **failed; const git_attr_fnmatch *pat; for (pos = 0; pos < patterns->length; ++pos) { if (git_bitvec_get(used, pos)) continue; if ((failed = git_array_alloc(*failures)) == NULL) return -1; pat = git_vector_get(patterns, pos); if ((*failed = git_pool_strdup(pool, pat->pattern)) == NULL) return -1; } return 0; } static int pathspec_match_from_iterator( git_pathspec_match_list **out, git_iterator *iter, uint32_t flags, git_pathspec *ps) { int error = 0; git_pathspec_match_list *m = NULL; const git_index_entry *entry = NULL; struct pathspec_match_context ctxt; git_vector *patterns = &ps->pathspec; bool find_failures = out && (flags & GIT_PATHSPEC_FIND_FAILURES) != 0; bool failures_only = !out || (flags & GIT_PATHSPEC_FAILURES_ONLY) != 0; size_t pos, used_ct = 0, found_files = 0; git_index *index = NULL; git_bitvec used_patterns; char **file; if (git_bitvec_init(&used_patterns, patterns->length) < 0) return -1; if (out) { *out = m = pathspec_match_alloc(ps, PATHSPEC_DATATYPE_STRINGS); GIT_ERROR_CHECK_ALLOC(m); } if ((error = git_iterator_reset_range(iter, ps->prefix, ps->prefix)) < 0) goto done; if (git_iterator_type(iter) == GIT_ITERATOR_WORKDIR && (error = git_repository_index__weakptr( &index, git_iterator_owner(iter))) < 0) goto done; pathspec_match_context_init( &ctxt, (flags & GIT_PATHSPEC_NO_GLOB) != 0, git_iterator_ignore_case(iter)); while (!(error = git_iterator_advance(&entry, iter))) { /* search for match with entry->path */ int result = git_pathspec__match_at( &pos, patterns, &ctxt, entry->path, NULL); /* no matches for this path */ if (result < 0) continue; /* if result was a negative pattern match, then don't list file */ if (!result) { used_ct += pathspec_mark_pattern(&used_patterns, pos); continue; } /* check if path is ignored and untracked */ if (index != NULL && git_iterator_current_is_ignored(iter) && git_index__find_pos(NULL, index, entry->path, 0, GIT_INDEX_STAGE_ANY) < 0) continue; /* mark the matched pattern as used */ used_ct += pathspec_mark_pattern(&used_patterns, pos); ++found_files; /* if find_failures is on, check if any later patterns also match */ if (find_failures && used_ct < patterns->length) used_ct += pathspec_mark_remaining( &used_patterns, patterns, &ctxt, pos + 1, entry->path, NULL); /* if only looking at failures, exit early or just continue */ if (failures_only || !out) { if (used_ct == patterns->length) break; continue; } /* insert matched path into matches array */ if ((file = (char **)git_array_alloc(m->matches)) == NULL || (*file = git_pool_strdup(&m->pool, entry->path)) == NULL) { error = -1; goto done; } } if (error < 0 && error != GIT_ITEROVER) goto done; error = 0; /* insert patterns that had no matches into failures array */ if (find_failures && used_ct < patterns->length && (error = pathspec_build_failure_array( &m->failures, patterns, &used_patterns, &m->pool)) < 0) goto done; /* if every pattern failed to match, then we have failed */ if ((flags & GIT_PATHSPEC_NO_MATCH_ERROR) != 0 && !found_files) { git_error_set(GIT_ERROR_INVALID, "no matching files were found"); error = GIT_ENOTFOUND; } done: git_bitvec_free(&used_patterns); if (error < 0) { pathspec_match_free(m); if (out) *out = NULL; } return error; } static git_iterator_flag_t pathspec_match_iter_flags(uint32_t flags) { git_iterator_flag_t f = 0; if ((flags & GIT_PATHSPEC_IGNORE_CASE) != 0) f |= GIT_ITERATOR_IGNORE_CASE; else if ((flags & GIT_PATHSPEC_USE_CASE) != 0) f |= GIT_ITERATOR_DONT_IGNORE_CASE; return f; } int git_pathspec_match_workdir( git_pathspec_match_list **out, git_repository *repo, uint32_t flags, git_pathspec *ps) { git_iterator *iter; git_iterator_options iter_opts = GIT_ITERATOR_OPTIONS_INIT; int error = 0; assert(repo); iter_opts.flags = pathspec_match_iter_flags(flags); if (!(error = git_iterator_for_workdir(&iter, repo, NULL, NULL, &iter_opts))) { error = pathspec_match_from_iterator(out, iter, flags, ps); git_iterator_free(iter); } return error; } int git_pathspec_match_index( git_pathspec_match_list **out, git_index *index, uint32_t flags, git_pathspec *ps) { git_iterator *iter; git_iterator_options iter_opts = GIT_ITERATOR_OPTIONS_INIT; int error = 0; assert(index); iter_opts.flags = pathspec_match_iter_flags(flags); if (!(error = git_iterator_for_index(&iter, git_index_owner(index), index, &iter_opts))) { error = pathspec_match_from_iterator(out, iter, flags, ps); git_iterator_free(iter); } return error; } int git_pathspec_match_tree( git_pathspec_match_list **out, git_tree *tree, uint32_t flags, git_pathspec *ps) { git_iterator *iter; git_iterator_options iter_opts = GIT_ITERATOR_OPTIONS_INIT; int error = 0; assert(tree); iter_opts.flags = pathspec_match_iter_flags(flags); if (!(error = git_iterator_for_tree(&iter, tree, &iter_opts))) { error = pathspec_match_from_iterator(out, iter, flags, ps); git_iterator_free(iter); } return error; } int git_pathspec_match_diff( git_pathspec_match_list **out, git_diff *diff, uint32_t flags, git_pathspec *ps) { int error = 0; git_pathspec_match_list *m = NULL; struct pathspec_match_context ctxt; git_vector *patterns = &ps->pathspec; bool find_failures = out && (flags & GIT_PATHSPEC_FIND_FAILURES) != 0; bool failures_only = !out || (flags & GIT_PATHSPEC_FAILURES_ONLY) != 0; size_t i, pos, used_ct = 0, found_deltas = 0; const git_diff_delta *delta, **match; git_bitvec used_patterns; assert(diff); if (git_bitvec_init(&used_patterns, patterns->length) < 0) return -1; if (out) { *out = m = pathspec_match_alloc(ps, PATHSPEC_DATATYPE_DIFF); GIT_ERROR_CHECK_ALLOC(m); } pathspec_match_context_init( &ctxt, (flags & GIT_PATHSPEC_NO_GLOB) != 0, git_diff_is_sorted_icase(diff)); git_vector_foreach(&diff->deltas, i, delta) { /* search for match with delta */ int result = git_pathspec__match_at( &pos, patterns, &ctxt, delta->old_file.path, delta->new_file.path); /* no matches for this path */ if (result < 0) continue; /* mark the matched pattern as used */ used_ct += pathspec_mark_pattern(&used_patterns, pos); /* if result was a negative pattern match, then don't list file */ if (!result) continue; ++found_deltas; /* if find_failures is on, check if any later patterns also match */ if (find_failures && used_ct < patterns->length) used_ct += pathspec_mark_remaining( &used_patterns, patterns, &ctxt, pos + 1, delta->old_file.path, delta->new_file.path); /* if only looking at failures, exit early or just continue */ if (failures_only || !out) { if (used_ct == patterns->length) break; continue; } /* insert matched delta into matches array */ if (!(match = (const git_diff_delta **)git_array_alloc(m->matches))) { error = -1; goto done; } else { *match = delta; } } /* insert patterns that had no matches into failures array */ if (find_failures && used_ct < patterns->length && (error = pathspec_build_failure_array( &m->failures, patterns, &used_patterns, &m->pool)) < 0) goto done; /* if every pattern failed to match, then we have failed */ if ((flags & GIT_PATHSPEC_NO_MATCH_ERROR) != 0 && !found_deltas) { git_error_set(GIT_ERROR_INVALID, "no matching deltas were found"); error = GIT_ENOTFOUND; } done: git_bitvec_free(&used_patterns); if (error < 0) { pathspec_match_free(m); if (out) *out = NULL; } return error; } void git_pathspec_match_list_free(git_pathspec_match_list *m) { if (m) pathspec_match_free(m); } size_t git_pathspec_match_list_entrycount( const git_pathspec_match_list *m) { return m ? git_array_size(m->matches) : 0; } const char *git_pathspec_match_list_entry( const git_pathspec_match_list *m, size_t pos) { if (!m || m->datatype != PATHSPEC_DATATYPE_STRINGS || !git_array_valid_index(m->matches, pos)) return NULL; return *((const char **)git_array_get(m->matches, pos)); } const git_diff_delta *git_pathspec_match_list_diff_entry( const git_pathspec_match_list *m, size_t pos) { if (!m || m->datatype != PATHSPEC_DATATYPE_DIFF || !git_array_valid_index(m->matches, pos)) return NULL; return *((const git_diff_delta **)git_array_get(m->matches, pos)); } size_t git_pathspec_match_list_failed_entrycount( const git_pathspec_match_list *m) { return m ? git_array_size(m->failures) : 0; } const char * git_pathspec_match_list_failed_entry( const git_pathspec_match_list *m, size_t pos) { char **entry = m ? git_array_get(m->failures, pos) : NULL; return entry ? *entry : NULL; }