#include "common.h" #include "repository.h" #include "filebuf.h" #include "attr_file.h" #include "attrcache.h" #include "git2/blob.h" #include "git2/tree.h" #include "index.h" #include <ctype.h> static void attr_file_free(git_attr_file *file) { bool unlock = !git_mutex_lock(&file->lock); git_attr_file__clear_rules(file, false); git_pool_clear(&file->pool); if (unlock) git_mutex_unlock(&file->lock); git_mutex_free(&file->lock); git__memzero(file, sizeof(*file)); git__free(file); } int git_attr_file__new( git_attr_file **out, git_attr_file_entry *entry, git_attr_file_source source) { git_attr_file *attrs = git__calloc(1, sizeof(git_attr_file)); GITERR_CHECK_ALLOC(attrs); if (git_mutex_init(&attrs->lock) < 0) { giterr_set(GITERR_OS, "Failed to initialize lock"); git__free(attrs); return -1; } if (git_pool_init(&attrs->pool, 1, 0) < 0) { attr_file_free(attrs); return -1; } GIT_REFCOUNT_INC(attrs); attrs->entry = entry; attrs->source = source; *out = attrs; return 0; } int git_attr_file__clear_rules(git_attr_file *file, bool need_lock) { unsigned int i; git_attr_rule *rule; if (need_lock && git_mutex_lock(&file->lock) < 0) { giterr_set(GITERR_OS, "Failed to lock attribute file"); return -1; } git_vector_foreach(&file->rules, i, rule) git_attr_rule__free(rule); git_vector_free(&file->rules); if (need_lock) git_mutex_unlock(&file->lock); return 0; } void git_attr_file__free(git_attr_file *file) { if (!file) return; GIT_REFCOUNT_DEC(file, attr_file_free); } static int attr_file_oid_from_index( git_oid *oid, git_repository *repo, const char *path) { int error; git_index *idx; size_t pos; const git_index_entry *entry; if ((error = git_repository_index__weakptr(&idx, repo)) < 0 || (error = git_index__find_pos(&pos, idx, path, 0, 0)) < 0) return error; if (!(entry = git_index_get_byindex(idx, pos))) return GIT_ENOTFOUND; *oid = entry->id; return 0; } int git_attr_file__load( git_attr_file **out, git_repository *repo, git_attr_session *attr_session, git_attr_file_entry *entry, git_attr_file_source source, git_attr_file_parser parser) { int error = 0; git_blob *blob = NULL; git_buf content = GIT_BUF_INIT; git_attr_file *file; struct stat st; bool nonexistent = false; *out = NULL; switch (source) { case GIT_ATTR_FILE__IN_MEMORY: /* in-memory attribute file doesn't need data */ break; case GIT_ATTR_FILE__FROM_INDEX: { git_oid id; if ((error = attr_file_oid_from_index(&id, repo, entry->path)) < 0 || (error = git_blob_lookup(&blob, repo, &id)) < 0) return error; /* Do not assume that data straight from the ODB is NULL-terminated; * copy the contents of a file to a buffer to work on */ git_buf_put(&content, git_blob_rawcontent(blob), git_blob_rawsize(blob)); break; } case GIT_ATTR_FILE__FROM_FILE: { int fd; /* For open or read errors, pretend that we got ENOTFOUND. */ /* TODO: issue warning when warning API is available */ if (p_stat(entry->fullpath, &st) < 0 || S_ISDIR(st.st_mode) || (fd = git_futils_open_ro(entry->fullpath)) < 0 || (error = git_futils_readbuffer_fd(&content, fd, (size_t)st.st_size)) < 0) nonexistent = true; else p_close(fd); break; } default: giterr_set(GITERR_INVALID, "Unknown file source %d", source); return -1; } if ((error = git_attr_file__new(&file, entry, source)) < 0) goto cleanup; /* store the key of the attr_reader; don't bother with cache * invalidation during the same attr reader session. */ if (attr_session) file->session_key = attr_session->key; if (parser && (error = parser(repo, file, git_buf_cstr(&content))) < 0) { git_attr_file__free(file); goto cleanup; } /* write cache breakers */ if (nonexistent) file->nonexistent = 1; else if (source == GIT_ATTR_FILE__FROM_INDEX) git_oid_cpy(&file->cache_data.oid, git_blob_id(blob)); else if (source == GIT_ATTR_FILE__FROM_FILE) git_futils_filestamp_set_from_stat(&file->cache_data.stamp, &st); /* else always cacheable */ *out = file; cleanup: git_blob_free(blob); git_buf_free(&content); return error; } int git_attr_file__out_of_date( git_repository *repo, git_attr_session *attr_session, git_attr_file *file) { if (!file) return 1; /* we are never out of date if we just created this data in the same * attr_session; otherwise, nonexistent files must be invalidated */ if (attr_session && attr_session->key == file->session_key) return 0; else if (file->nonexistent) return 1; switch (file->source) { case GIT_ATTR_FILE__IN_MEMORY: return 0; case GIT_ATTR_FILE__FROM_FILE: return git_futils_filestamp_check( &file->cache_data.stamp, file->entry->fullpath); case GIT_ATTR_FILE__FROM_INDEX: { int error; git_oid id; if ((error = attr_file_oid_from_index( &id, repo, file->entry->path)) < 0) return error; return (git_oid__cmp(&file->cache_data.oid, &id) != 0); } default: giterr_set(GITERR_INVALID, "Invalid file type %d", file->source); return -1; } } static int sort_by_hash_and_name(const void *a_raw, const void *b_raw); static void git_attr_rule__clear(git_attr_rule *rule); static bool parse_optimized_patterns( git_attr_fnmatch *spec, git_pool *pool, const char *pattern); int git_attr_file__parse_buffer( git_repository *repo, git_attr_file *attrs, const char *data) { int error = 0; const char *scan = data, *context = NULL; git_attr_rule *rule = NULL; /* if subdir file path, convert context for file paths */ if (attrs->entry && git_path_root(attrs->entry->path) < 0 && !git__suffixcmp(attrs->entry->path, "/" GIT_ATTR_FILE)) context = attrs->entry->path; if (git_mutex_lock(&attrs->lock) < 0) { giterr_set(GITERR_OS, "Failed to lock attribute file"); return -1; } while (!error && *scan) { /* allocate rule if needed */ if (!rule && !(rule = git__calloc(1, sizeof(*rule)))) { error = -1; break; } rule->match.flags = GIT_ATTR_FNMATCH_ALLOWNEG | GIT_ATTR_FNMATCH_ALLOWMACRO; /* parse the next "pattern attr attr attr" line */ if (!(error = git_attr_fnmatch__parse( &rule->match, &attrs->pool, context, &scan)) && !(error = git_attr_assignment__parse( repo, &attrs->pool, &rule->assigns, &scan))) { if (rule->match.flags & GIT_ATTR_FNMATCH_MACRO) /* TODO: warning if macro found in file below repo root */ error = git_attr_cache__insert_macro(repo, rule); else error = git_vector_insert(&attrs->rules, rule); } /* if the rule wasn't a pattern, on to the next */ if (error < 0) { git_attr_rule__clear(rule); /* reset rule contents */ if (error == GIT_ENOTFOUND) error = 0; } else { rule = NULL; /* vector now "owns" the rule */ } } git_mutex_unlock(&attrs->lock); git_attr_rule__free(rule); return error; } uint32_t git_attr_file__name_hash(const char *name) { uint32_t h = 5381; int c; assert(name); while ((c = (int)*name++) != 0) h = ((h << 5) + h) + c; return h; } int git_attr_file__lookup_one( git_attr_file *file, git_attr_path *path, const char *attr, const char **value) { size_t i; git_attr_name name; git_attr_rule *rule; *value = NULL; name.name = attr; name.name_hash = git_attr_file__name_hash(attr); git_attr_file__foreach_matching_rule(file, path, i, rule) { size_t pos; if (!git_vector_bsearch(&pos, &rule->assigns, &name)) { *value = ((git_attr_assignment *) git_vector_get(&rule->assigns, pos))->value; break; } } return 0; } int git_attr_file__load_standalone(git_attr_file **out, const char *path) { int error; git_attr_file *file; git_buf content = GIT_BUF_INIT; error = git_attr_file__new(&file, NULL, GIT_ATTR_FILE__FROM_FILE); if (error < 0) return error; error = git_attr_cache__alloc_file_entry( &file->entry, NULL, path, &file->pool); if (error < 0) { git_attr_file__free(file); return error; } /* because the cache entry is allocated from the file's own pool, we * don't have to free it - freeing file+pool will free cache entry, too. */ if (!(error = git_futils_readbuffer(&content, path))) { error = git_attr_file__parse_buffer(NULL, file, content.ptr); git_buf_free(&content); } if (error < 0) git_attr_file__free(file); else *out = file; return error; } bool git_attr_fnmatch__match( git_attr_fnmatch *match, git_attr_path *path) { const char *relpath = path->path; const char *filename; int flags = 0; /* * If the rule was generated in a subdirectory, we must only * use it for paths inside that directory. We can thus return * a non-match if the prefixes don't match. */ if (match->containing_dir) { if (match->flags & GIT_ATTR_FNMATCH_ICASE) { if (git__strncasecmp(path->path, match->containing_dir, match->containing_dir_length)) return 0; } else { if (git__prefixcmp(path->path, match->containing_dir)) return 0; } relpath += match->containing_dir_length; } if (match->flags & GIT_ATTR_FNMATCH_ICASE) flags |= FNM_CASEFOLD; if (match->flags & GIT_ATTR_FNMATCH_LEADINGDIR) flags |= FNM_LEADING_DIR; if (match->flags & GIT_ATTR_FNMATCH_FULLPATH) { filename = relpath; flags |= FNM_PATHNAME; } else { filename = path->basename; if (path->is_dir) flags |= FNM_LEADING_DIR; } if ((match->flags & GIT_ATTR_FNMATCH_DIRECTORY) && !path->is_dir) { bool samename; /* for attribute checks or root ignore checks, fail match */ if (!(match->flags & GIT_ATTR_FNMATCH_IGNORE) || path->basename == path->path) return false; flags |= FNM_LEADING_DIR; /* fail match if this is a file with same name as ignored folder */ samename = (match->flags & GIT_ATTR_FNMATCH_ICASE) ? !strcasecmp(match->pattern, relpath) : !strcmp(match->pattern, relpath); if (samename) return false; return (p_fnmatch(match->pattern, relpath, flags) != FNM_NOMATCH); } /* if path is a directory prefix of a negated pattern, then match */ if ((match->flags & GIT_ATTR_FNMATCH_NEGATIVE) && path->is_dir) { size_t pathlen = strlen(relpath); bool prefixed = (pathlen <= match->length) && ((match->flags & GIT_ATTR_FNMATCH_ICASE) ? !strncasecmp(match->pattern, relpath, pathlen) : !strncmp(match->pattern, relpath, pathlen)); if (prefixed && git_path_at_end_of_segment(&match->pattern[pathlen])) return true; } return (p_fnmatch(match->pattern, filename, flags) != FNM_NOMATCH); } bool git_attr_rule__match( git_attr_rule *rule, git_attr_path *path) { bool matched = git_attr_fnmatch__match(&rule->match, path); if (rule->match.flags & GIT_ATTR_FNMATCH_NEGATIVE) matched = !matched; return matched; } git_attr_assignment *git_attr_rule__lookup_assignment( git_attr_rule *rule, const char *name) { size_t pos; git_attr_name key; key.name = name; key.name_hash = git_attr_file__name_hash(name); if (git_vector_bsearch(&pos, &rule->assigns, &key)) return NULL; return git_vector_get(&rule->assigns, pos); } int git_attr_path__init( git_attr_path *info, const char *path, const char *base, git_dir_flag dir_flag) { ssize_t root; /* build full path as best we can */ git_buf_init(&info->full, 0); if (git_path_join_unrooted(&info->full, path, base, &root) < 0) return -1; info->path = info->full.ptr + root; /* remove trailing slashes */ while (info->full.size > 0) { if (info->full.ptr[info->full.size - 1] != '/') break; info->full.size--; } info->full.ptr[info->full.size] = '\0'; /* skip leading slashes in path */ while (*info->path == '/') info->path++; /* find trailing basename component */ info->basename = strrchr(info->path, '/'); if (info->basename) info->basename++; if (!info->basename || !*info->basename) info->basename = info->path; switch (dir_flag) { case GIT_DIR_FLAG_FALSE: info->is_dir = 0; break; case GIT_DIR_FLAG_TRUE: info->is_dir = 1; break; case GIT_DIR_FLAG_UNKNOWN: default: info->is_dir = (int)git_path_isdir(info->full.ptr); break; } return 0; } void git_attr_path__free(git_attr_path *info) { git_buf_free(&info->full); info->path = NULL; info->basename = NULL; } /* * From gitattributes(5): * * Patterns have the following format: * * - A blank line matches no files, so it can serve as a separator for * readability. * * - A line starting with # serves as a comment. * * - An optional prefix ! which negates the pattern; any matching file * excluded by a previous pattern will become included again. If a negated * pattern matches, this will override lower precedence patterns sources. * * - If the pattern ends with a slash, it is removed for the purpose of the * following description, but it would only find a match with a directory. In * other words, foo/ will match a directory foo and paths underneath it, but * will not match a regular file or a symbolic link foo (this is consistent * with the way how pathspec works in general in git). * * - If the pattern does not contain a slash /, git treats it as a shell glob * pattern and checks for a match against the pathname without leading * directories. * * - Otherwise, git treats the pattern as a shell glob suitable for consumption * by fnmatch(3) with the FNM_PATHNAME flag: wildcards in the pattern will * not match a / in the pathname. For example, "Documentation/\*.html" matches * "Documentation/git.html" but not "Documentation/ppc/ppc.html". A leading * slash matches the beginning of the pathname; for example, "/\*.c" matches * "cat-file.c" but not "mozilla-sha1/sha1.c". */ /* * This will return 0 if the spec was filled out, * GIT_ENOTFOUND if the fnmatch does not require matching, or * another error code there was an actual problem. */ int git_attr_fnmatch__parse( git_attr_fnmatch *spec, git_pool *pool, const char *context, const char **base) { const char *pattern, *scan; int slash_count, allow_space; assert(spec && base && *base); if (parse_optimized_patterns(spec, pool, *base)) return 0; spec->flags = (spec->flags & GIT_ATTR_FNMATCH__INCOMING); allow_space = ((spec->flags & GIT_ATTR_FNMATCH_ALLOWSPACE) != 0); pattern = *base; while (git__isspace(*pattern)) pattern++; if (!*pattern || *pattern == '#') { *base = git__next_line(pattern); return GIT_ENOTFOUND; } if (*pattern == '[' && (spec->flags & GIT_ATTR_FNMATCH_ALLOWMACRO) != 0) { if (strncmp(pattern, "[attr]", 6) == 0) { spec->flags = spec->flags | GIT_ATTR_FNMATCH_MACRO; pattern += 6; } /* else a character range like [a-e]* which is accepted */ } if (*pattern == '!' && (spec->flags & GIT_ATTR_FNMATCH_ALLOWNEG) != 0) { spec->flags = spec->flags | GIT_ATTR_FNMATCH_NEGATIVE | GIT_ATTR_FNMATCH_LEADINGDIR; pattern++; } slash_count = 0; for (scan = pattern; *scan != '\0'; ++scan) { /* scan until (non-escaped) white space */ if (git__isspace(*scan) && *(scan - 1) != '\\') { if (!allow_space || (*scan != ' ' && *scan != '\t' && *scan != '\r')) break; } if (*scan == '/') { spec->flags = spec->flags | GIT_ATTR_FNMATCH_FULLPATH; slash_count++; if (pattern == scan) pattern++; } /* remember if we see an unescaped wildcard in pattern */ else if (git__iswildcard(*scan) && (scan == pattern || (*(scan - 1) != '\\'))) spec->flags = spec->flags | GIT_ATTR_FNMATCH_HASWILD; } *base = scan; if ((spec->length = scan - pattern) == 0) return GIT_ENOTFOUND; /* * Remove one trailing \r in case this is a CRLF delimited * file, in the case of Icon\r\r\n, we still leave the first * \r there to match against. */ if (pattern[spec->length - 1] == '\r') if (--spec->length == 0) return GIT_ENOTFOUND; if (pattern[spec->length - 1] == '/') { spec->length--; spec->flags = spec->flags | GIT_ATTR_FNMATCH_DIRECTORY; if (--slash_count <= 0) spec->flags = spec->flags & ~GIT_ATTR_FNMATCH_FULLPATH; } if ((spec->flags & GIT_ATTR_FNMATCH_NOLEADINGDIR) == 0 && spec->length >= 2 && pattern[spec->length - 1] == '*' && pattern[spec->length - 2] == '/') { spec->length -= 2; spec->flags = spec->flags | GIT_ATTR_FNMATCH_LEADINGDIR; /* leave FULLPATH match on, however */ } if (context) { char *slash = strrchr(context, '/'); size_t len; if (slash) { /* include the slash for easier matching */ len = slash - context + 1; spec->containing_dir = git_pool_strndup(pool, context, len); spec->containing_dir_length = len; } } spec->pattern = git_pool_strndup(pool, pattern, spec->length); if (!spec->pattern) { *base = git__next_line(pattern); return -1; } else { /* strip '\' that might have be used for internal whitespace */ spec->length = git__unescape(spec->pattern); /* TODO: convert remaining '\' into '/' for POSIX ??? */ } return 0; } static bool parse_optimized_patterns( git_attr_fnmatch *spec, git_pool *pool, const char *pattern) { if (!pattern[1] && (pattern[0] == '*' || pattern[0] == '.')) { spec->flags = GIT_ATTR_FNMATCH_MATCH_ALL; spec->pattern = git_pool_strndup(pool, pattern, 1); spec->length = 1; return true; } return false; } static int sort_by_hash_and_name(const void *a_raw, const void *b_raw) { const git_attr_name *a = a_raw; const git_attr_name *b = b_raw; if (b->name_hash < a->name_hash) return 1; else if (b->name_hash > a->name_hash) return -1; else return strcmp(b->name, a->name); } static void git_attr_assignment__free(git_attr_assignment *assign) { /* name and value are stored in a git_pool associated with the * git_attr_file, so they do not need to be freed here */ assign->name = NULL; assign->value = NULL; git__free(assign); } static int merge_assignments(void **old_raw, void *new_raw) { git_attr_assignment **old = (git_attr_assignment **)old_raw; git_attr_assignment *new = (git_attr_assignment *)new_raw; GIT_REFCOUNT_DEC(*old, git_attr_assignment__free); *old = new; return GIT_EEXISTS; } int git_attr_assignment__parse( git_repository *repo, git_pool *pool, git_vector *assigns, const char **base) { int error; const char *scan = *base; git_attr_assignment *assign = NULL; assert(assigns && !assigns->length); git_vector_set_cmp(assigns, sort_by_hash_and_name); while (*scan && *scan != '\n') { const char *name_start, *value_start; /* skip leading blanks */ while (git__isspace(*scan) && *scan != '\n') scan++; /* allocate assign if needed */ if (!assign) { assign = git__calloc(1, sizeof(git_attr_assignment)); GITERR_CHECK_ALLOC(assign); GIT_REFCOUNT_INC(assign); } assign->name_hash = 5381; assign->value = git_attr__true; /* look for magic name prefixes */ if (*scan == '-') { assign->value = git_attr__false; scan++; } else if (*scan == '!') { assign->value = git_attr__unset; /* explicit unspecified state */ scan++; } else if (*scan == '#') /* comment rest of line */ break; /* find the name */ name_start = scan; while (*scan && !git__isspace(*scan) && *scan != '=') { assign->name_hash = ((assign->name_hash << 5) + assign->name_hash) + *scan; scan++; } if (scan == name_start) { /* must have found lone prefix (" - ") or leading = ("=foo") * or end of buffer -- advance until whitespace and continue */ while (*scan && !git__isspace(*scan)) scan++; continue; } /* allocate permanent storage for name */ assign->name = git_pool_strndup(pool, name_start, scan - name_start); GITERR_CHECK_ALLOC(assign->name); /* if there is an equals sign, find the value */ if (*scan == '=') { for (value_start = ++scan; *scan && !git__isspace(*scan); ++scan); /* if we found a value, allocate permanent storage for it */ if (scan > value_start) { assign->value = git_pool_strndup(pool, value_start, scan - value_start); GITERR_CHECK_ALLOC(assign->value); } } /* expand macros (if given a repo with a macro cache) */ if (repo != NULL && assign->value == git_attr__true) { git_attr_rule *macro = git_attr_cache__lookup_macro(repo, assign->name); if (macro != NULL) { unsigned int i; git_attr_assignment *massign; git_vector_foreach(¯o->assigns, i, massign) { GIT_REFCOUNT_INC(massign); error = git_vector_insert_sorted( assigns, massign, &merge_assignments); if (error < 0 && error != GIT_EEXISTS) { git_attr_assignment__free(assign); return error; } } } } /* insert allocated assign into vector */ error = git_vector_insert_sorted(assigns, assign, &merge_assignments); if (error < 0 && error != GIT_EEXISTS) return error; /* clear assign since it is now "owned" by the vector */ assign = NULL; } if (assign != NULL) git_attr_assignment__free(assign); *base = git__next_line(scan); return (assigns->length == 0) ? GIT_ENOTFOUND : 0; } static void git_attr_rule__clear(git_attr_rule *rule) { unsigned int i; git_attr_assignment *assign; if (!rule) return; if (!(rule->match.flags & GIT_ATTR_FNMATCH_IGNORE)) { git_vector_foreach(&rule->assigns, i, assign) GIT_REFCOUNT_DEC(assign, git_attr_assignment__free); git_vector_free(&rule->assigns); } /* match.pattern is stored in a git_pool, so no need to free */ rule->match.pattern = NULL; rule->match.length = 0; } void git_attr_rule__free(git_attr_rule *rule) { git_attr_rule__clear(rule); git__free(rule); } int git_attr_session__init(git_attr_session *session, git_repository *repo) { assert(repo); session->key = git_atomic_inc(&repo->attr_session_key); return 0; } void git_attr_session__free(git_attr_session *session) { if (!session) return; git_buf_free(&session->sysdir); git_buf_free(&session->tmp); memset(session, 0, sizeof(git_attr_session)); }