Commit 89a34828 by Patrick Steinhardt

diff: implement function to calculate patch ID

The upstream git project provides the ability to calculate a so-called
patch ID. Quoting from git-patch-id(1):

    A "patch ID" is nothing but a sum of SHA-1 of the file diffs
    associated with a patch, with whitespace and line numbers ignored."

Patch IDs can be used to identify two patches which are probably the
same thing, e.g. when a patch has been cherry-picked to another branch.

This commit implements a new function `git_diff_patchid`, which gets a
patch and derives an OID from the diff. Note the different terminology
here: a patch in libgit2 are the differences in a single file and a diff
can contain multiple patches for different files. The implementation
matches the upstream implementation and should derive the same OID for
the same diff. In fact, some code has been directly derived from the
upstream implementation.

The upstream implementation has two different modes to calculate patch
IDs, which is the stable and unstable mode. The old way of calculating
the patch IDs was unstable in a sense that a different ordering the
diffs was leading to different results. This oversight was fixed in git
1.9, but as git tries hard to never break existing workflows, the old
and unstable way is still default. The newer and stable way does not
care for ordering of the diff hunks, and in fact it is the mode that
should probably be used today. So right now, we only implement the
stable way of generating the patch ID.
parent fa948752
...@@ -1400,6 +1400,51 @@ GIT_EXTERN(int) git_diff_format_email_init_options( ...@@ -1400,6 +1400,51 @@ GIT_EXTERN(int) git_diff_format_email_init_options(
git_diff_format_email_options *opts, git_diff_format_email_options *opts,
unsigned int version); unsigned int version);
/**
* Patch ID options structure
*
* Initialize with `GIT_DIFF_PATCHID_OPTIONS_INIT` macro to
* correctly set the default values and version.
*/
typedef struct git_diff_patchid_options {
unsigned int version;
} git_diff_patchid_options;
#define GIT_DIFF_PATCHID_OPTIONS_VERSION 1
#define GIT_DIFF_PATCHID_OPTIONS_INIT { GIT_DIFF_PATCHID_OPTIONS_VERSION }
/**
* Initialize `git_diff_patchid_options` structure.
*
* Initializes the structure with default values. Equivalent to
* creating an instance with `GIT_DIFF_PATCHID_OPTIONS_INIT`.
*/
GIT_EXTERN(int) git_diff_patchid_init_options(
git_diff_patchid_options *opts,
unsigned int version);
/**
* Calculate the patch ID for the given patch.
*
* Calculate a stable patch ID for the given patch by summing the
* hash of the file diffs, ignoring whitespace and line numbers.
* This can be used to derive whether two diffs are the same with
* a high probability.
*
* Currently, this function only calculates stable patch IDs, as
* defined in git-patch-id(1), and should in fact generate the
* same IDs as the upstream git project does.
*
* @param out Pointer where the calculated patch ID shoul be
* stored
* @param diff The diff to calculate the ID for
* @param opts Options for how to calculate the patch ID. This is
* intended for future changes, as currently no options are
* available.
* @return 0 on success, an error code otherwise.
*/
GIT_EXTERN(int) git_diff_patchid(git_oid *out, git_diff *diff, git_diff_patchid_options *opts);
GIT_END_DECL GIT_END_DECL
/** @} */ /** @} */
......
...@@ -19,6 +19,12 @@ ...@@ -19,6 +19,12 @@
#define DIFF_FLAG_SET(DIFF,FLAG,VAL) (DIFF)->opts.flags = \ #define DIFF_FLAG_SET(DIFF,FLAG,VAL) (DIFF)->opts.flags = \
(VAL) ? ((DIFF)->opts.flags | (FLAG)) : ((DIFF)->opts.flags & ~(VAL)) (VAL) ? ((DIFF)->opts.flags | (FLAG)) : ((DIFF)->opts.flags & ~(VAL))
struct patch_id_args {
git_hash_ctx ctx;
git_oid result;
int first_file;
};
GIT_INLINE(const char *) diff_delta__path(const git_diff_delta *delta) GIT_INLINE(const char *) diff_delta__path(const git_diff_delta *delta)
{ {
const char *str = delta->old_file.path; const char *str = delta->old_file.path;
...@@ -374,3 +380,141 @@ int git_diff_format_email_init_options( ...@@ -374,3 +380,141 @@ int git_diff_format_email_init_options(
return 0; return 0;
} }
static int flush_hunk(git_oid *result, git_hash_ctx *ctx)
{
git_oid hash;
unsigned short carry = 0;
int error, i;
if ((error = git_hash_final(&hash, ctx)) < 0 ||
(error = git_hash_init(ctx)) < 0)
return error;
for (i = 0; i < GIT_OID_RAWSZ; i++) {
carry += result->id[i] + hash.id[i];
result->id[i] = carry;
carry >>= 8;
}
return 0;
}
static void strip_spaces(git_buf *buf)
{
char *src = buf->ptr, *dst = buf->ptr;
char c;
size_t len = 0;
while ((c = *src++) != '\0') {
if (!git__isspace(c)) {
*dst++ = c;
len++;
}
}
git_buf_truncate(buf, len);
}
static int file_cb(
const git_diff_delta *delta,
float progress,
void *payload)
{
struct patch_id_args *args = (struct patch_id_args *) payload;
git_buf buf = GIT_BUF_INIT;
int error;
GIT_UNUSED(progress);
if (!args->first_file &&
(error = flush_hunk(&args->result, &args->ctx)) < 0)
goto out;
args->first_file = 0;
if ((error = git_buf_printf(&buf,
"diff--gita/%sb/%s---a/%s+++b/%s",
delta->old_file.path,
delta->new_file.path,
delta->old_file.path,
delta->new_file.path)) < 0)
goto out;
strip_spaces(&buf);
if ((error = git_hash_update(&args->ctx, buf.ptr, buf.size)) < 0)
goto out;
out:
git_buf_free(&buf);
return error;
}
static int line_cb(
const git_diff_delta *delta,
const git_diff_hunk *hunk,
const git_diff_line *line,
void *payload)
{
struct patch_id_args *args = (struct patch_id_args *) payload;
git_buf buf = GIT_BUF_INIT;
int error;
GIT_UNUSED(delta);
GIT_UNUSED(hunk);
switch (line->origin) {
case GIT_DIFF_LINE_ADDITION:
git_buf_putc(&buf, '+');
break;
case GIT_DIFF_LINE_DELETION:
git_buf_putc(&buf, '-');
break;
case GIT_DIFF_LINE_CONTEXT:
break;
default:
giterr_set(GITERR_PATCH, "invalid line origin for patch");
return -1;
}
git_buf_put(&buf, line->content, line->content_len);
strip_spaces(&buf);
if ((error = git_hash_update(&args->ctx, buf.ptr, buf.size)) < 0)
goto out;
out:
git_buf_free(&buf);
return error;
}
int git_diff_patchid_init_options(git_diff_patchid_options *opts, unsigned int version)
{
GIT_INIT_STRUCTURE_FROM_TEMPLATE(
opts, version, git_diff_patchid_options, GIT_DIFF_PATCHID_OPTIONS_INIT);
return 0;
}
int git_diff_patchid(git_oid *out, git_diff *diff, git_diff_patchid_options *opts)
{
struct patch_id_args args;
int error;
GITERR_CHECK_VERSION(
opts, GIT_DIFF_PATCHID_OPTIONS_VERSION, "git_diff_patchid_options");
memset(&args, 0, sizeof(args));
args.first_file = 1;
if ((error = git_hash_ctx_init(&args.ctx)) < 0)
goto out;
if ((error = git_diff_foreach(diff, file_cb, NULL, NULL, line_cb, &args)) < 0)
goto out;
if ((error = (flush_hunk(&args.result, &args.ctx))) < 0)
goto out;
git_oid_cpy(out, &args.result);
out:
return error;
}
...@@ -176,4 +176,8 @@ void test_core_structinit__compare(void) ...@@ -176,4 +176,8 @@ void test_core_structinit__compare(void)
CHECK_MACRO_FUNC_INIT_EQUAL( \ CHECK_MACRO_FUNC_INIT_EQUAL( \
git_proxy_options, GIT_PROXY_OPTIONS_VERSION, \ git_proxy_options, GIT_PROXY_OPTIONS_VERSION, \
GIT_PROXY_OPTIONS_INIT, git_proxy_init_options); GIT_PROXY_OPTIONS_INIT, git_proxy_init_options);
CHECK_MACRO_FUNC_INIT_EQUAL( \
git_diff_patchid_options, GIT_DIFF_PATCHID_OPTIONS_VERSION, \
GIT_DIFF_PATCHID_OPTIONS_INIT, git_diff_patchid_init_options);
} }
#include "clar_libgit2.h"
#include "patch/patch_common.h"
static void verify_patch_id(const char *diff_content, const char *expected_id)
{
git_oid expected_oid, actual_oid;
git_diff *diff;
cl_git_pass(git_oid_fromstr(&expected_oid, expected_id));
cl_git_pass(git_diff_from_buffer(&diff, diff_content, strlen(diff_content)));
cl_git_pass(git_diff_patchid(&actual_oid, diff, NULL));
cl_assert_equal_oid(&expected_oid, &actual_oid);
git_diff_free(diff);
}
void test_diff_patchid__simple_commit(void)
{
verify_patch_id(PATCH_SIMPLE_COMMIT, "06094b1948b878b7d9ff7560b4eae672a014b0ec");
}
void test_diff_patchid__filename_with_spaces(void)
{
verify_patch_id(PATCH_APPEND_NO_NL, "f0ba05413beaef743b630e796153839462ee477a");
}
void test_diff_patchid__multiple_hunks(void)
{
verify_patch_id(PATCH_MULTIPLE_HUNKS, "81e26c34643d17f521e57c483a6a637e18ba1f57");
}
void test_diff_patchid__multiple_files(void)
{
verify_patch_id(PATCH_MULTIPLE_FILES, "192d1f49d23f2004517963aecd3f8a6c467f50ff");
}
void test_diff_patchid__same_diff_with_differing_whitespace_has_same_id(void)
{
const char *tabs =
"diff --git a/file.txt b/file.txt\n"
"index 8fecc09..1d43a92 100644\n"
"--- a/file.txt\n"
"+++ b/file.txt\n"
"@@ -1 +1 @@\n"
"-old text\n"
"+ new text\n";
const char *spaces =
"diff --git a/file.txt b/file.txt\n"
"index 8fecc09..1d43a92 100644\n"
"--- a/file.txt\n"
"+++ b/file.txt\n"
"@@ -1 +1 @@\n"
"-old text\n"
"+ new text\n";
const char *id = "11efdd13c30f7a1056eac2ae2fb952da475e2c23";
verify_patch_id(tabs, id);
verify_patch_id(spaces, id);
}
...@@ -253,7 +253,66 @@ ...@@ -253,7 +253,66 @@
"@@ -9,0 +10 @@ below it!\n" \ "@@ -9,0 +10 @@ below it!\n" \
"+insert at end\n" "+insert at end\n"
/* An insertion at the beginning and end of file (and the resultant patch) */ #define PATCH_SIMPLE_COMMIT \
"commit 15e119375018fba121cf58e02a9f17fe22df0df8\n" \
"Author: Edward Thomson <ethomson@edwardthomson.com>\n" \
"Date: Wed Jun 14 13:31:20 2017 +0200\n" \
"\n" \
" CHANGELOG: document git_filter_init and GIT_FILTER_INIT\n" \
"\n" \
"diff --git a/CHANGELOG.md b/CHANGELOG.md\n" \
"index 1b9e0c90a..24ecba426 100644\n" \
"--- a/CHANGELOG.md\n" \
"+++ b/CHANGELOG.md\n" \
"@@ -96,6 +96,9 @@ v0.26\n" \
" * `git_transport_smart_proxy_options()' enables you to get the proxy options for\n" \
" smart transports.\n" \
"\n" \
"+* The `GIT_FILTER_INIT` macro and the `git_filter_init` function are provided\n" \
"+ to initialize a `git_filter` structure.\n" \
"+\n" \
" ### Breaking API changes\n" \
"\n" \
" * `clone_checkout_strategy` has been removed from\n"
#define PATCH_MULTIPLE_HUNKS \
"diff --git a/x b/x\n" \
"index 0719398..fa0350c 100644\n" \
"--- a/x\n" \
"+++ b/x\n" \
"@@ -1,5 +1,4 @@\n" \
" 1\n" \
"-2\n" \
" 3\n" \
" 4\n" \
" 5\n" \
"@@ -7,3 +6,4 @@\n" \
" 7\n" \
" 8\n" \
" 9\n" \
"+10\n"
#define PATCH_MULTIPLE_FILES \
"diff --git a/x b/x\n" \
"index 8a1218a..7059ba5 100644\n" \
"--- a/x\n" \
"+++ b/x\n" \
"@@ -1,5 +1,4 @@\n" \
" 1\n" \
" 2\n" \
"-3\n" \
" 4\n" \
" 5\n" \
"diff --git a/y b/y\n" \
"index e006065..9405325 100644\n" \
"--- a/y\n" \
"+++ b/y\n" \
"@@ -1,4 +1,5 @@\n" \
" a\n" \
" b\n" \
"+c\n" \
" d\n" \
" e\n"
#define FILE_PREPEND_AND_APPEND \ #define FILE_PREPEND_AND_APPEND \
"first and\n" \ "first and\n" \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment