#! /bin/bash
# -*-coding: iso-8859-1; -*-
#
# (C) Copyright IBM Corp. 2001, 2003
#
# $Id: findDeviantFiles,v 1.11 2004/06/11 17:51:19 augart-oss Exp $
#
# This shell finds plain files that have:
#   no authorship tag: "@ author", or
#   no copyright notice: "(C) Copyright", or
#   no id: "$Id: findDeviantFiles,v 1.11 2004/06/11 17:51:19 augart-oss Exp $".
#
# Three parameters:
#   target directory
#   source directory where to put output files
#   suffix to identify output files	
#
# This script could be more efficiently implemented by searching for all 
# the missing information at the same time; however, we would not know 
# what information was missing in a particular file.
#
# @Author: Peter F. Sweeney
# @Date: 2001/10/05
# @Modified Steven Augart
# @Date 2003/06/02, 2003/07
#	Ported to Bash

usage() {
    local exitstatus="${1-1}";
    cat >&2 <<EOF
Usage: $ME <input-subtree> <output-directory> [ <suffix> ]
  or:  $ME --help
   
  Where 
    <input-subtree>	identifies the directory subtree to search for files, 
    <output-directory>	indicates where to put output files, and
    <suffix>		is a file extension appended to the output files.

  This script finds all files in a directory subtree 
  that don't contain either:
    1) an @author javadoc tag, 
    2) a copyright notice, or
    3) an \$Id: tag.

  Output files:

    "<output-directory>/noAuthors[.<suffix>]"
	      Names of files without the @author tag
    "<output-directory>/noCopyright[.<suffix>]"   
	      Names of files without a copyright notice
    "<output-directory>/noId[.<suffix>]"   
	      Names of files without an \$Id: tag.

EOF
    exit ${exitstatus}
}

# Turns on the -u option, where we scream if trouble.
set -o nounset

# What is our name?
# Bash internal shorthand that works like the "basename" command.
ME="${0##*/}"

if (( $# < 1 )); then		# make sure $1 is set.
    usage 1;
fi
case "$1" in
    -help|--help) usage 0 ;;
esac

if (( $# < 2 || $# > 3 )); then
    usage 1;
fi
in="$1"
out="$2"
if (( $# == 3 )); then
    suffix="$3"
    [[ $suffix == .* ]] || suffix=".$suffix"
else
    suffix=""
fi

# Where are we?
# Bash internal shorthand that works like the "dirname" command.
mydir="${0%/*}"			# temporary; directory we were run from

if [[ $mydir && $mydir != $ME ]]; then
    # Where to find auxiliary programs:
    bin_dir="${mydir}"
else
    bin_dir="${RVM_ROOT:?$ME: You must set the RVM_ROOT variable before you run this program.}/rvm/bin"
fi

function croak_nonusage () {
    # Display the error message.  If it's a multi-line error message, indent
    # the second and subsequent lines by a few spaces.  
    # Try to auto-wrap the message if we have GNU Fold.

    local gnufold="/usr/bin/fold --width=65 --spaces"
    $gnufold < /dev/null &> /dev/null || gnufold=cat

    echo "${ME}: $*" | $gnufold | sed -e '2,$s/^/     /' >&2
    trap '' EXIT
    exit 1
}

sanity_env="${bin_dir}/sanity-envars.bash"
[[ -f $sanity_env ]] || croak_nonusage "Internal error: Cannot find the file sanity-envars.bash ($sanity_env); something is badly broken."
[[ -r $sanity_env ]] || croak_nonusage "Internal error: Cannot read the file sanity-envars.bash ($sanity_env); something is badly broken.  You might check the file permissions or user id you used to extract Jikes RVM."
. "${sanity_env}";		# sets ME, defines checkenv()

checkenv RVM_ROOT
bin_dir=${RVM_ROOT}/rvm/bin

if [[ "$mydir" == /* ]] && [ "$mydir" != "$bin_dir" ]; then
    echo >&2 "Something is really strange here: This program was invoked out of  the directory $mydir, but your RVM_ROOT is set to $RVM_ROOT. You may have an inconsistent PATH and RVM_ROOT.  Going on anyway."
fi

trap 'echo "$ME: An Unexpected Error happened; execution aborted" >&2' EXIT
set -e # Abort on errors

## Create output directory if necessary.
mkdir -p "$2"
# cd to input directory
cd $in


## Will not follow symbolic links; you'd have to say "-follow" to do it.
# Skip Emacs backup files (this might be run from the working directory of a user.
# Skip CVS internal files used to store older revisions.
# open a subexpression.  Must be closed in the user (look for the ")")
# Skip CVS directories.
# Do show normal files.

# This will make the bug discussed below appear irrespective of what is going on..  That is good.  Better that this program break than not work.
shopt -s nullglob
### XXX This array version breaks due to what I believe is a bug in
### Bash, whereby the expression '*~' is being expanded in the calls
### to `find' (at least, it is being expanded whenever it might be.)
### OK, this is a bug in BASH version "2.05a.0(1)-release".  
### The manual page clearly states: "the special pattern characters must be quoted if they are to be matched literally".

## Turn off the globbing.  This works in all BASH versions.
set -f
## Redundant with above
set -o noglob

## Make sure ! is not magic.  This really doesn't help, thought, since Bash
## defines '!' as a reserved word.
## Oddly, bash gripes even when the reserved words are quoted.  How odd.
set +H

declare -r -a find_optsA=( \
	-name CVS -prune \
        -o \
	'\!' -name '*~'  \
	'\!' -name '.*~'  \
	'\!' -name '.#*' \
	-type f -print \
);

## The same bug afflicts the one below:

declare -r find_optsSTR="-name CVS -prune -o \! -name *~ \! -name .*~ \! -name .#* -type f -print"

#echo "find all plain files without @Author tags"
find . "${find_optsA[@]}" -exec fgrep -c "@author" {} \; | "$bin_dir/findMatchedFiles.perl" | awk -f "$bin_dir/noAuthorFilter.awk" > "$out/noAuthor$suffix"
#find . ${find_opts} -exec fgrep -c "@author" {} \; | "$bin_dir/findMatchedFiles.perl" | awk -f "$bin_dir/noAuthorFilter.awk" > "$out/noAuthor$suffix"

#echo "find all plain files without copyright notices"
find . "${find_optsA[@]}" -exec egrep -c "\([cC]\) Copyright|Copyright |Copyright \([cC]\)" {} \; | "$bin_dir/findMatchedFiles.perl" | awk -f "$bin_dir/noCopyrightFilter.awk" > "$out/noCopyright${suffix}"

#echo "find all plain files without \$Id: tags"
find . "${find_optsA[@]}" -exec fgrep -c '$Id' {} \; | "$bin_dir/findMatchedFiles.perl" | awk -f "$bin_dir/noIdFilter.awk" > "$out/noId${suffix}"


trap '' EXIT
