#!/usr/bin/env ruby
# encoding: utf-8

 Myname = File.basename($0)
Version = '1.45'

Help=
<<'DOC'
= match_parens - find mismatches of various brackets and quotes

= Synopsis
match_parens [filename]	

== Options
-h		print this help and exit
-H,--help	print full documentation and exit
-V,--version	print version and exit
-l,--latex	convert ``...'' to “...” before testing
-n,--number=N	set number of mismatching characters shown to N (default: 10)
-p,--pairs=S	set matching pairs to S (default: {}[]()“”«»‘’""'')
   --test	do an internal test and exit

= Description
Mismatches of parentheses, braces, (angle) brackets, especially in TeX
sources which may be rich in those, may be difficult to trace. This little
script helps you by writing your text to standard output, after adding a
left margin to your text, which will normally be almost empty, but will
clearly show up to 10 mismatches. (Just try me on myself to see that the
parenthesis starting this sentence will not appear to be matched at the end
of the file. If you look at me in the vim editor, then select this
paragraph and try the command: |:!%|.

By default, the following pairs are tested:
()	round brackets or parentheses
{}	curly brackets or braces
[]	square brackets
<>	angle brackets (within html text only)
""	ASCII double quotes
“”	Unicode double quotation marks
''	ASCII single quotes
‘’	Unicode single quotation marks

The exit value of the script is 0 when there are no mismatches, 1 otherwise.

Angle brackets are only looked for inside HTML text, where HTML is
supposed to start with |<html>| or |=begin rdoc| and to end with
|</html>| or |=end|.

= Options
-h,--help	
	print short help information and exit.
-H,--Help	
	print full documentation via less and exit.
-V,--version	
	print this script's version and exit.
-l,--latex	
	convert |``...''| to |“...”| before testing.
-n,--number=N	
	Set number of mismatching characters shown to N. By default, only the
	first 10 are shown.
-p,--pairs=S	
	Set matching pairs to S (default: |{}[]()“”''«»""‘’|). For
	example, if you want to look for mismatching ASCII single quotes
	/only/, use |--pairs="''"|.
	Or, if you want to match braces and guillemets only, use |-p «»|.
	Note that if html is detected in your text, |<>| is automatically added
	to the pairs list. So by default, |<...>| is tested only in html, but
	you can test that in other text by specifying the |<>| pair in the
	|--pairs| option.
   --test	
	do an internal test and exit. Note that if, with the |--pairs| option,
	you specify an other pairs list than the default, the test will
	probably fail, but you can still see the effects of your pairs list on
	the test data.

= Examples
Suppose we have two files, |good| and |bad|, containing these texts:

good:	
	This is a (simple) test
	without mismatches

bad:	
	This is a (simple test
	containing mismatches

then here are some usage examples. First a simple test on these files:
   $ matchparens good
         1 |	| This is a (simple) test
         2 |	| without mismatches
   $ echo $?
   0
   $ matchparens bad
         1 | (	| This is a (simple test
         2 | (	| containing mismatches
   $ echo $?
   1

Just report if there are mismatches: 
   $ matchparens good >/dev/null && echo fine || echo problems
   fine
   $ matchparens bad >/dev/null && echo fine || echo problems
   problems

Report all tex files with mismatches in the current directory:
   $ for i in *.tex; do matchparens $i >/dev/null || echo $i; done

Matches must be in correct order:
   $ echo -e "This is a ([simple)] test\n" | match_parens
         1 ([)]	This is a ([simple)] test
         2 ([)]	

= Author
[Wybo Dekker](wybodekker@me.com)

= Copyright
Released under the [GNU General Public License](www.gnu.org/copyleft/gpl.html)
DOC

require 'optparse'
require 'ostruct'

def help
  system("echo \"#{Help}\" | less -P#{Myname}-#{Version.tr('.','·')}")
end

# Option defaults:
$o = OpenStruct.new(
    :number => 10,
     :input => STDIN,
     :latex => false,
    :lineno => 0,
         :s => '',
      :html => false,
      :test => false,
:parenstext => %q{{}[]()“”''«»""‘’}
)

OptionParser.new(
   banner = <<~EOD,
	#{Myname} A find mismatches of various brackets and quotes\n
	Usage: match_parens [filename]\n
	Options:
	EOD
   width = 15, # 2x8-1
   indent = ''
) do |opts|

   opts.on('-h',
      'print this help and exit'
   ) do
      puts opts.to_s.sub(/^ *-I\n/,'').gsub(/                 /,'')
      exit
   end

   opts.on('-H','--help',
      'print full documentation via less and exit'
   ) do
      help
      exit
   end

   opts.on('-V','--version',
      'print version and exit'
   ) do
      puts Version
      exit
   end

   opts.on('-l','--latex',
     'convert ``...'' to “...” before testing'
   ) do
      $o.latex=true
   end

   opts.on('-n','--number=N',Integer,
     "set number of mismatching characters shown to N (default: #{$o.number})"
   ) do |v|
      $o.number = v
   end

   opts.on('-p','--pairs=S',String,
     "set matching pairs to S (default: #{$o.parenstext})"
   ) do |v|
      $o.parenstext = v
   end

   opts.on('--test',
     'do an internal test and exit'
   ) do
      $o.input = DATA
      $o.test = true
   end

   opts.on('-I') do
      system("instscript #{Myname}")
      exit
   end

   opts.parse!
end

parenshtml = $o.parenstext =~ /<>/ ? $o.parenstext : $o.parenstext + '<>'
parens = $o.parenstext

arg = ARGV[0] || ''
unless arg.empty?
   test(?e,arg) or raise("file #{arg} does not exist")
   test(?r,arg) or raise("file #{arg} is not readable")
   $o.input = File.new(arg)
end
while x = $o.input.gets()
   # convert LaTeX's ``...'' to “...”
   if $o.latex
      x = x.gsub(/``/,'“').gsub(/''/,'”')
   end

   # only inside html text we check <>, too:
   if $o.html && (x=~/^([# ]*=end)/ || x=~/<\/html>/i)
      $o.html = false
      parens = $o.parenstext
   elsif x=~/^([# ]*=begin rdoc)/ || x=~/<html>/i
      $o.html = true
      parens = parenshtml
   end
   # match any pair like (), {}, [], “”, <> in parens
   re = Regexp.new(parens.scan(/../).join('|').gsub(/[{}()\[\]$]/,'\\\\\&'))

   # move parens' characters into s
   $o.s << x.tr('^'+parens,'')
   # remove matches from inside
   while $o.s.gsub!(re,'') do end
   puts "%4d | %-*s | %s" % [$o.lineno+=1,$o.number,$o.s.slice(0..$o.number-1),x]
end

if $o.test
   if s != '“{”}'
      raise("test failed! (final mismatch should be “{”})")
   else
      puts "test succeeded"
   end
else
   exit $o.s.empty? ? 0 : 1
end
__END__
This first line “{()}” has no mismatch
but here ( we have one.
It is even extended “{() with two more.

Here comes an untested angle bracket <, but here }” all) mismatches
are erased.

=begin rdoc
Inside html text now. ‘{There is an unmatched} single quote here.
A brace { is added to it here,
and an angle bracket < is added.
But > all mismatches <are>}’ cancelled here.
=end

Four mismatches start ({"'start {here} but
are cancelled '" here}).
We end here with a mismatch caused by “{(improper)”} nesting!
But the test succeeds because we expected that.
