#!/usr/bin/env python
# Originally modified from A. Israel's script seen at
# https://gist.github.com/aisrael/b2b78d9dfdd176a232b9
"""To run this script first install the dependencies


  virtualenv v
  source v/bin/activate
  pip install githubpy GitPython requests

Generate a github access token; this is needed as the anonymous access
to Github's API will easily hit the limit even with a single invocation.
For details see:
https://help.github.com/articles/creating-an-access-token-for-command-line-use/

Next either set the github token as an env variable
`GITHUB_ACCESS_TOKEN` or alternatively invoke the script with
`--token` switch.

Example:

  ceph-release-notes -r tags/v0.87..origin/giant $(git rev-parse --show-toplevel)

"""

from __future__ import print_function
import argparse
import github
import os
import re
import sys
import requests

from git import Repo


merge_re = re.compile("Merge pull request #(\d+).*")
fixes_re = re.compile(r"Fixes\:? #(\d+)")
tracker_re = re.compile("http://tracker.ceph.com/issues/(\d+)")
signed_off_re = re.compile("Signed-off-by: (.+) <")
tracker_uri = "http://tracker.ceph.com/issues/{0}.json"

def get_original_issue(issue, verbose):
    r = requests.get(tracker_uri.format(issue),params={"include":"relations"}).json()

    # looking up for the original issue only makes sense
    # when dealing with an issue in the Backport tracker
    if r["issue"]["tracker"]["name"] != "Backport":
        if verbose:
            print ("http://tracker.ceph.com/issues/" + issue + " is from the tracker " + r["issue"]["tracker"]["name"] + ", do not look for the original issue")
        return issue

    # if a Backport issue does not have a relation, keep it
    if "relations" not in r["issue"]:
        if verbose:
            print ("http://tracker.ceph.com/issues/" + issue + " has no relations, do not look for the original issue")
        return issue
    
    copied_to = [str(i['issue_id']) for i in r["issue"]["relations"] if i["relation_type"] == "copied_to"]
    if copied_to:
        if len(copied_to) > 1:
            if verbose:
                print ("ERROR: http://tracker.ceph.com/issues/" + issue + " has more than one Copied To relation")
            return issue
        if verbose:
            print ("http://tracker.ceph.com/issues/" + issue + " is the backport of http://tracker.ceph.com/issues/" + copied_to[0])
        return copied_to[0]
    else:
        if verbose:
            print ("http://tracker.ceph.com/issues/" + issue + " has no copied_to relations, do not look for the original issue")
        return issue

def split_component(title,gh,number):
    title_re = '(bluestore|build/ops|cephfs|cmake|core|common|crush|cli|doc|fs|librados|mds|mon|msgr|mgr|osd|log|librbd|rbd|objecter|pybind|rgw|tests|tools)(:.*)'
    match = re.match(title_re, title)
    if match:
        return match.group(1)+match.group(2)
    else:
        issue = gh.repos("ceph")("ceph").issues(number).get()
        labels = {it['name'] for it in issue['labels']}
        if 'documentation' in labels:
            return 'doc: '+ title
        item = {'bluestore','build/ops','cephfs','common','core','fs','mgr','pybind','rgw','rbd'}.intersection(labels)
        if item:
            return ",".join(item)+': '+title
        else:
            return 'UNKNOWN: '+ title


def make_release_notes(gh, repo, ref, plaintext, verbose, strict, use_tags):

    issue2prs = {}
    pr2issues = {}
    pr2info = {}

    for commit in repo.iter_commits(ref, merges=True):
        merge = merge_re.match(commit.summary)
        if merge:
            number = merge.group(1)
            pr = gh.repos("ceph")("ceph").pulls(number).get()
            title = pr['title']
            message_lines = commit.message.split('\n')
            if not strict and len(message_lines) > 1:
                lines = []
                for line in message_lines[1:]:
                    if 'Reviewed-by' in line:
                        continue
                    line = line.strip()
                    if line:
                        lines.append(line)
                duplicates_pr_title = lines[0] == pr['title'].strip()
                if duplicates_pr_title:
                    lines.pop(0)
                if len(lines) == 0:
                    if duplicates_pr_title:
                        message = None
                elif len(lines) == 1:
                    # assume that a single line means the intention is to
                    # re-write the PR title
                    title = lines[0]
                    message = None
                else:
                    message = "    " + "\n    ".join(lines)
            else:
                message = None
            issues = []
            if pr['body']:
                issues = fixes_re.findall(pr['body']) + tracker_re.findall(pr['body'])

            authors = {}
            for c in repo.iter_commits("{sha1}^1..{sha1}^2".format(sha1=commit.hexsha)):
                for author in re.findall("Signed-off-by:\s*(.*?)\s*<", c.message):
                    authors[author] = 1
                issues.extend(fixes_re.findall(c.message) + tracker_re.findall(c.message))
            if authors:
                author = ", ".join(authors.keys())
            else:
                author = commit.parents[-1].author.name

            if strict and not issues:
                print ("ERROR: http://github.com/ceph/ceph/pull/" + str(number) + " has no associated issue")
                continue    
            
            if strict:
                title_re = '^(?:hammer|infernalis|jewel|kraken):\s+(cli|common|mon|msgr|osd|log|librbd|rbd|fs|mds|objecter|rgw|build/ops|tests|tools|cmake|doc|crush|librados)(:.*)'
                match = re.match(title_re, title)
                if not match:
                    print ("ERROR: http://github.com/ceph/ceph/pull/" + str(number) + " title " + title.encode("utf-8") + " does not match " + title_re)
                else:
                    title = match.group(1) + match.group(2)
            if use_tags:
                title = split_component(title,gh,number)

            pr2info[number] = (author, title, message)

            for issue in set(issues):
                issue = get_original_issue(issue, verbose)
                issue2prs.setdefault(issue, set([])).add(number)
                pr2issues.setdefault(number, set([])).add(issue)
            sys.stdout.write('.')

    print (" done collecting merges.")

    if strict:
        for (issue, prs) in issue2prs.iteritems():
            if len(prs) > 1:
                print (">>>>>>> " + str(len(prs)) + " pr for issue " + issue + " " + str(prs))

    for (pr, (author, title, message)) in sorted(pr2info.iteritems(), key=lambda (k,v): (v[2], v[1])):
        if pr in pr2issues:
            if plaintext:
                issues = map(lambda issue: '#' + str(issue), pr2issues[pr])
            else:
                issues = map(lambda issue: '`issue#{issue} <http://tracker.ceph.com/issues/{issue}>`_'.format(issue=issue), pr2issues[pr])
            issues = ", ".join(issues) + ", "
        else:
            issues = ''
        if plaintext:
            print ("* {title} ({issues}{author})".format(title=title.encode("utf-8"), issues=issues, author=author.encode("utf-8")))
        else:
            print ("* {title} ({issues}`pr#{pr} <http://github.com/ceph/ceph/pull/{pr}>`_, {author})".format(title=title.encode("utf-8"), issues=issues, author=author.encode("utf-8"), pr=pr))
        if message:
            print (message)


if __name__ == "__main__":
    desc = '''
    Make ceph release notes for a given revision. Eg usage:

    $ ceph-release-notes -r tags/v0.87..origin/giant $(git rev-parse --show-toplevel)

    It is recommended to set the github env. token in order to avoid
    hitting the api rate limits.
    '''

    parser = argparse.ArgumentParser(description=desc,
                                     formatter_class=argparse.RawTextHelpFormatter)

    parser.add_argument("--rev", "-r",
                        help="git revision range for creating release notes")
    parser.add_argument("--text", "-t",
                        action='store_true', default=None,
                        help="output plain text only, no links")
    parser.add_argument("--verbose", "-v",
                        action='store_true', default=None,
                        help="verbose")
    parser.add_argument("--strict",
                        action='store_true', default=None,
                        help="strict, recommended only for backport releases")
    parser.add_argument("repo", metavar="repo",
                        help="path to ceph git repo")
    parser.add_argument("--token", default=os.getenv("GITHUB_ACCESS_TOKEN"),
                        help="Github Access Token ($GITHUB_ACCESS_TOKEN otherwise)")
    parser.add_argument("--use-tags", default=False,
                        help="Use github tags to guess the component")

    args = parser.parse_args()
    gh = github.GitHub(
        access_token=args.token)

    make_release_notes(gh, Repo(args.repo), args.rev, args.text, args.verbose, args.strict, args.use_tags)
