#!/bin/sh
exec perl -w -x $0 ${1+"$@"} # -*- mode: perl; perl-indent-level: 2; -*-
#!perl -w
##############################################################
### ###
### cvs2cl.pl: produce ChangeLog(s) from `cvs log` output. ###
### ###
##############################################################
## $Revision: 1.1 $
## $Date: 2002/02/06 04:36:17 $
## $Author: seank $
##
## (C) 1999 Karl Fogel <kfogel@red-bean.com>, under the GNU GPL.
##
## (Extensively hacked on by Melissa O'Neill <oneill@cs.sfu.ca>.)
##
## cvs2cl.pl is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 2, or (at your option)
## any later version.
##
## cvs2cl.pl is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
##
## You may have received a copy of the GNU General Public License
## along with cvs2cl.pl; see the file COPYING. If not, write to the
## Free Software Foundation, Inc., 59 Temple Place - Suite 330,
## Boston, MA 02111-1307, USA.
use strict;
use Text::Wrap;
use Time::Local;
use File::Basename;
# The Plan:
#
# Read in the logs for multiple files, spit out a nice ChangeLog that
# mirrors the information entered during `cvs commit'.
#
# The problem presents some challenges. In an ideal world, we could
# detect files with the same author, log message, and checkin time --
# each <filelist, author, time, logmessage> would be a changelog entry.
# We'd sort them; and spit them out. Unfortunately, CVS is *not atomic*
# so checkins can span a range of times. Also, the directory structure
# could be hierarchical.
#
# Another question is whether we really want to have the ChangeLog
# exactly reflect commits. An author could issue two related commits,
# with different log entries, reflecting a single logical change to the
# source. GNU style ChangeLogs group these under a single author/date.
# We try to do the same.
#
# So, we parse the output of `cvs log', storing log messages in a
# multilevel hash that stores the mapping:
# directory => author => time => message => filelist
# As we go, we notice "nearby" commit times and store them together
# (i.e., under the same timestamp), so they appear in the same log
# entry.
#
# When we've read all the logs, we twist this mapping into
# a time => author => message => filelist mapping for each directory.
#
# If we're not using the `--distributed' flag, the directory is always
# considered to be `./', even as descend into subdirectories.
############### Globals ################
# What we run to generate it:
my $Log_Source_Command = "cvs log";
# In case we have to print it out:
my $VERSION = '$Revision: 1.1 $';
$VERSION =~ s/\S+\s+(\S+)\s+\S+/$1/;
## Vars set by options:
# Print debugging messages?
my $Debug = 0;
# Just show version and exit?
my $Print_Version = 0;
# Just print usage message and exit?
my $Print_Usage = 0;
# Single top-level ChangeLog, or one per subdirectory?
my $Distributed = 0;
# What file should we generate (defaults to "ChangeLog")?
my $Log_File_Name = "ChangeLog";
# Grab most recent entry date from existing ChangeLog file, just add
# to that ChangeLog.
my $Cumulative = 0;
# Expand usernames to email addresses based on a map file?
my $User_Map_File = "";
# Output to a file or to stdout?
my $Output_To_Stdout = 0;
# Eliminate empty log messages?
my $Prune_Empty_Msgs = 0;
# Don't call Text::Wrap on the body of the message
my $No_Wrap = 0;
# Separates header from log message. Code assumes it is either " " or
# "\n\n", so if there's ever an option to set it to something else,
# make sure to go through all conditionals that use this var.
my $After_Header = " ";
# Format more for programs than for humans.
my $XML_Output = 0;
# Do some special tweaks for log data that was written in FSF
# ChangeLog style.
my $FSF_Style = 0;
# Show times in UTC instead of local time
my $UTC_Times = 0;
# Show day of week in output?
my $Show_Day_Of_Week = 0;
# Show revision numbers in output?
my $Show_Revisions = 0;
# Show tags (symbolic names) in output?
my $Show_Tags = 0;
# Show branches by symbolic name in output?
my $Show_Branches = 0;
# Show only revisions on these branches or their ancestors.
my @Follow_Branches;
# Don't bother with files matching this regexp.
my @Ignore_Files;
# How exactly we match entries. We definitely want "o",
# and user might add "i" by using --case-insensitive option.
my $Case_Insensitive = 0;
# Maybe only show log messages matching a certain regular expression.
my $Regexp_Gate = "";
# Pass this global option string along to cvs, to the left of `log':
my $Global_Opts = "";
# Pass this option string along to the cvs log subcommand:
my $Command_Opts = "";
# Read log output from stdin instead of invoking cvs log?
my $Input_From_Stdin = 0;
# Don't show filenames in output.
my $Hide_Filenames = 0;
# Max checkin duration. CVS checkin is not atomic, so we may have checkin
# times that span a range of time. We assume that checkins will last no
# longer than $Max_Checkin_Duration seconds, and that similarly, no
# checkins will happen from the same users with the same message less
# than $Max_Checkin_Duration seconds apart.
my $Max_Checkin_Duration = 180;
# What to put at the front of [each] ChangeLog.
my $ChangeLog_Header = "";
## end vars set by options.
# In 'cvs log' output, one long unbroken line of equal signs separates
# files:
my $file_separator = "======================================="
. "======================================";
# In 'cvs log' output, a shorter line of dashes separates log messages
# within a file:
my $logmsg_separator = "----------------------------";
############### End globals ############
&parse_options ();
&derive_change_log ();
### Everything below is subroutine definitions. ###
# If accumulating, grab the boundary date from pre-existing ChangeLog.
sub maybe_grab_accumulation_date ()
{
if (! $Cumulative) {
return "";
}
# else
open (LOG, "$Log_File_Name")
or die ("trouble opening $Log_File_Name for reading ($!)");
my $boundary_date;
while (<LOG>)
{
if (/^(\d\d\d\d-\d\d-\d\d\s+\d\d:\d\d)/)
{
$boundary_date = "$1";
last;
}
}
close (LOG);
return $boundary_date;
}
# Fills up a ChangeLog structure in the current directory.
sub derive_change_log ()
{
# See "The Plan" above for a full explanation.
my %grand_poobah;
my $file_full_path;
my $time;
my $revision;
my $author;
my $msg_txt;
my $detected_file_separator;
# Might be adding to an existing ChangeLog
my $accumulation_date = &maybe_grab_accumulation_date ();
if ($accumulation_date) {
$Log_Source_Command .= " -d\'>${accumulation_date}\'";
}
# We might be expanding usernames
my %usermap;
# In general, it's probably not very maintainable to use state
# variables like this to tell the loop what it's doing at any given
# moment, but this is only the first one, and if we never have more
# than a few of these, it's okay.
my $collecting_symbolic_names = 0;
my %symbolic_names; # Where tag names get stored.
my %branch_names; # We'll grab branch names while we're at it.
my %branch_numbers; # Save some revisions for @Follow_Branches
my @branch_roots; # For showing which files are branch ancestors.
# Bleargh. Compensate for a deficiency of custom wrapping.
if (($After_Header ne " ") and $FSF_Style)
{
$After_Header .= "\t";
}
if (! $Input_From_Stdin) {
open (LOG_SOURCE, "$Log_Source_Command |")
or die "unable to run \"${Log_Source_Command}\"";
}
else {
open (LOG_SOURCE, "-") or die "unable to open stdin for reading";
}
%usermap = &maybe_read_user_map_file ();
while (<LOG_SOURCE>)
{
# If on a new file and don't see filename, skip until we find it, and
# when we find it, grab it.
if ((! (defined $file_full_path)) and /^Working file: (.*)/)
{
$file_full_path = $1;
if (@Ignore_Files)
{
my $base;
($base, undef, undef) = fileparse ($file_full_path);
# Ouch, I wish trailing operators in regexps could be
# evaluated on the fly!
if ($Case_Insensitive) {
if (grep ($file_full_path =~ m|$_|i, @Ignore_Files)) {
undef $file_full_path;
}
}
elsif (grep ($file_full_path =~ m|$_|, @Ignore_Files)) {
undef $file_full_path;
}
}
next;
}
# Just spin wheels if no file defined yet.
next if (! $file_full_path);
# Collect tag names in case we're asked to print them in the output.
if (/^symbolic names:$/) {
$collecting_symbolic_names = 1;
next; # There's no more info on this line, so skip to next
}
if ($collecting_symbolic_names)
{
# All tag names are listed with whitespace in front in cvs log
# output; so if see non-whitespace, then we're done collecting.
if (/^\S/) {
$collecting_symbolic_names = 0;
}
else # we're looking at a tag name, so parse & store it
{
# According to the Cederqvist manual, in node "Tags", tag
# names must start with an uppercase or lowercase letter and
# can contain uppercase and lowercase letters, digits, `-',
# and `_'. However, it's not our place to enforce that, so
# we'll allow anything CVS hands us to be a tag:
/^\s+([^:]+): ([\d.]+)$/;
my $tag_name = $1;
my $tag_rev = $2;
# A branch number either has an odd number of digit sections
# (and hence an even number of dots), or has ".0." as the
# second-to-last digit section. Test for these conditions.
my $real_branch_rev = "";
if (($tag_rev =~ /^(\d+\.\d+\.)+\d+$/) # Even number of dots...
and (! ($tag_rev =~ /^(1\.)+1$/))) # ...but not "1.[1.]1"
{
$real_branch_rev = $tag_rev;
}
elsif ($tag_rev =~ /(\d+\.(\d+\.)+)0.(\d+)/) # Has ".0."
{
$real_branch_rev = $1 . $3;
}
# If we got a branch, record its number.
if ($real_branch_rev)
{
$branch_names{$real_branch_rev} = $tag_name;
if (@Follow_Branches) {
if (grep ($_ eq $tag_name, @Follow_Branches)) {
$branch_numbers{$tag_name} = $real_branch_rev;
}
}
}
else {
# Else it's just a regular (non-branch) tag.
push (@{$symbolic_names{$tag_rev}}, $tag_name);
}
}
}
# End of code for collecting tag names.
# If have file name, but not revision, and see revision, then grab
# it. (We collect unconditionally, even though we may or may not
# ever use it.)
if ((! (defined $revision)) and (/^revision (\d+\.[\d.]+)/))
{
$revision = $1;
if (@Follow_Branches)
{
foreach my $branch (@Follow_Branches)
{
# Special case for following trunk revisions
if (($branch =~ /^trunk$/i) and ($revision =~ /^[0-9]+\.[0-9]+$/))
{
goto dengo;
}
my $branch_number = $branch_numbers{$branch};
if ($branch_number)
{
# Are we on one of the follow branches or an ancestor of
# same?
#
# If this revision is a prefix of the branch number, or
# possibly is less in the minormost number, OR if this
# branch number is a prefix of the revision, then yes.
# Otherwise, no.
#
# So below, we determine if any of those conditions are
# met.
# Trivial case: is this revision on the branch?
# (Compare this way to avoid regexps that screw up Emacs
# indentation, argh.)
if ((substr ($revision, 0, ((length ($branch_number)) + 1)))
eq ($branch_number . "."))
{
goto dengo;
}
# Non-trivial case: check if rev is ancestral to branch
elsif ((length ($branch_number)) > (length ($revision)))
{
$revision =~ /^((?:\d+\.)+)(\d+)$/;
my $r_left = $1; # still has the trailing "."
my $r_end = $2;
$branch_number =~ /^((?:\d+\.)+)(\d+)\.\d+$/;
my $b_left = $1; # still has trailing "."
my $b_mid = $2; # has no trailing "."
if (($r_left eq $b_left)
&& ($r_end <= $b_mid))
{
goto dengo;
}
}
}
}
}
else # (! @Follow_Branches)
{
next;
}
# Else we are following branches, but this revision isn't on the
# path. So skip it.
undef $revision;
dengo:
next;
}
# If we don't have a revision right now, we couldn't possibly
# be looking at anything useful.
if (! (defined ($revision))) {
$detected_file_separator = /^$file_separator$/o;
if ($detected_file_separator) {
# No revisions for this file; can happen, e.g. "cvs log -d DATE"
goto CLEAR;
}
else {
next;
}
}
# If have file name but not date and author, and see date or
# author, then grab them:
unless (defined $time)
{
if (/^date: .*/)
{
($time, $author) = &parse_date_and_author ($_);
if (defined ($usermap{$author}) and $usermap{$author}) {
$author = $usermap{$author};
}
}
else {
$detected_file_separator = /^$file_separator$/o;
if ($detected_file_separator) {
# No revisions for this file; can happen, e.g. "cvs log -d DATE"
goto CLEAR;
}
}
# If the date/time/author hasn't been found yet, we couldn't
# possibly care about anything we see. So skip:
next;
}
# A "branches: ..." line here indicates that one or more branches
# are rooted at this revision. If we're showing branches, then we
# want to show that fact as well, so we collect all the branches
# that this is the latest ancestor of and store them in
# @branch_roots. Just for reference, the format of the line we're
# seeing at this point is:
#
# branches: 1.5.2; 1.5.4; ...;
#
# Okay, here goes:
if (/^branches:\s+(.*);$/)
{
if ($Show_Branche