update_authors 4.61 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131
#!/usr/bin/env bash
#
# Copyright (c) 2021 ETH Zurich, University of Bologna
# SPDX-License-Identifier: Apache-2.0
#
# Authors:
# - Andreas Kurth <akurth@iis.ee.ethz.ch>

set -euo pipefail

stderr() {
    printf "$@" >&2
}

# Some authors prefer to not appear with real name and email address in author lists.
declare -A hide=( \
)

# Map each author name to an email address.
declare -A emails=( \
    ["Andreas Kurth"]="akurth@iis.ee.ethz.ch" \
    ["Fabian Schuiki"]="fschuiki@iis.ee.ethz.ch" \
    ["Florian Zaruba"]="zarubaf@iis.ee.ethz.ch" \
    ["Matheus Cavalcante"]="matheusd@iis.ee.ethz.ch" \
    ["Samuel Riedel"]="sriedel@iis.ee.ethz.ch" \
    ["Stefan Mach"]="smach@iis.ee.ethz.ch" \
    ["Wolfgang Rönninger"]="wroennin@iis.ee.ethz.ch" \
)

# Iterate over source files (see `done` line for which files are included).
while IFS= read -r -d $'\0' file; do
    if $(echo "$file" | grep -q '\.svh\?$'); then # SystemVerilog file
        comment_lead='//'
        comment_lead_escaped='\/\/'
    else
        comment_lead='#'
        comment_lead_escaped='#'
    fi
    re_authors="^\\s*${comment_lead}\\s*Authors:\$"

    # If the file does not contain an 'Authors:' paragraph, create one.
    if ! grep -q "$re_authors" "$file"; then
        # Find first line after copyright header.
        copyright_end=$(awk "BEGIN {newparagraph=1;}
        {
            if (newparagraph) {
                if (\$0 ~ /^${comment_lead_escaped}\\s*[cC]opyright/) {
                    newparagraph=0;
                    next
                } else {
                    print NR;
                    exit
                }
            } else if (\$0 ~ /^\\/*$/) {
                newparagraph=1;
                next
            }
        }" "$file")
        # Insert 'Authors:' header.
        sed -i -e "${copyright_end}a${comment_lead} Authors:" "$file"
    fi

    # Find line before and after authors list.
    authors_begin=$(grep -n -m 1 "$re_authors" $file | sed 's/\([0-9]\+\).*/\1/')
    authors_end=$(awk "{if (NR > $authors_begin && \$0 ~ /^\$/) {print NR; exit}}" "$file")

    # Replace author list with information from `git log`.
    tmpfile=$(mktemp)
    {
        sed -n "1,${authors_begin}p" "$file"
        readarray -t changes <<<"$(git log --follow --numstat --format='%an' "$file" | awk '
        BEGIN {totalchange=0; }
        {
            if (NR % 3 == 1) {
                author=$0;
            } else if (NR % 3 == 0) {
                if (!change[author]) {
                    change[author]=0;
                }
                change[author]+=$1;
                totalchange+=$1
                change[author]+=$2;
                totalchange+=$2;
            }
        }
        END {
            print totalchange;
            for (author in change) {
                print change[author] " " author;
            }
        }')"
        totalchange="${changes[0]}"
        authorchanges=()
        for authorchange in "${changes[@]:1}"; do
            name="$(echo "$authorchange" | cut -d' ' -f 2-)"
            change="$(echo "$authorchange" | cut -d' ' -f 1)"
            # Only list authors who have contributed to more than 5% of the changes in the file.
            if test $(( $change * 100 / $totalchange )) -ge 5; then
                authorchanges+=("$change $name")
            fi
        done
        # Sort author with most changes first.
        readarray -t authors < <(printf '%s\n' "${authorchanges[@]}" | sort -nr)
        stderr "$file\n"
        stderr '%s\n' "${authors[@]}"
        stderr '\n'
        readarray -t authors < <(printf '%s\n' "${authors[@]}" | cut -d' ' -f2-)
        for author in "${authors[@]}"; do
            if ! "${hide[$author]:-false}"; then
                if ! test ${emails[$author]+_}; then
                    stderr "Error: Email address of author '$author' unknown!\n"
                    exit 1
                fi
                email="${emails[$author]}"
                author_ascii=$(echo "$author" | sed 's/ö/oe/g')
                if $(echo "$author_ascii" | grep -q -P '[^[:ascii:]]'); then
                    stderr "Author name '$author_ascii' contains a non-ASCII character!\n"
                    exit 1
                fi
                echo "$comment_lead - $author_ascii <$email>"
            fi
        done
        sed -n "$authors_end,\$p" "$file"
    } > "$tmpfile"
    chmod --reference="$file" "$tmpfile" # retain file permissions
    mv "$tmpfile" "$file"

done < <(git ls-tree -r -z --name-only HEAD -- 'include' 'scripts' 'src' 'test' \
    | grep -z -P '\.s(?:vh?|h)$|^(?!.+\.)')
    # left alternative matches files with .sh, .sv, and .svh extensions;
    # right alternative matches files with no extension