#!/bin/sh
exec ruby -w -x $0 ${1+"$@"} # -*- ruby -*-
#!ruby -w
# vim: set filetype=ruby : set sw=2

# An extended grep, with extended functionality including full regular
# expressions, contextual output, highlighting, detection and exclusion of
# nontext files, and complex matching criteria.

# $Id: glark,v 1.83 2004/12/20 15:00:58 jeugenepace Exp $

require "English"
require 'singleton'

$stdout.sync = true             # unbuffer
$stderr.sync = true             # unbuffer

$PACKAGE = "glark"
$VERSION = "1.7.2"
# $DEBUGGING = false


# -------------------------------------------------------
# ANSI colou?r
# -------------------------------------------------------

#     Attribute codes:
#         00=none 01=bold 04=underscore 05=blink 07=reverse 08=concealed
#     Text color codes:
#         30=black 31=red 32=green 33=yellow 34=blue 35=magenta 36=cyan 37=white
#     Background color codes:
#         40=black 41=red 42=green 43=yellow 44=blue 45=magenta 46=cyan 47=white

module Text

  # Highlights text using either ANSI terminal codes, or HTML.

  # Note that the foreground and background sections can have modifiers
  # (attributes).
  # 
  # Examples:
  #     black
  #     blue on white
  #     bold green on yellow
  #     underscore bold magenta on cyan
  #     underscore red on cyan
  
  class Highlighter

    VERSION = "1.0.3"

    ATTRIBUTES = %w{
      none
      reset
      bold
      underscore
      underline
      blink
      reverse
      concealed
      black
      red
      green
      yellow
      blue
      magenta
      cyan
      white
      on_black
      on_red
      on_green
      on_yellow
      on_blue
      on_magenta
      on_cyan
      on_white
    }

    def self.parse_colors(str)
      fg, bg = str.split(/\s*\bon_?\s*/)
      fgcolors = fg ? fg.split(/\s+/) : Array.new
      bgcolors = bg ? [ "on_" + bg ] : Array.new
      [ fgcolors, bgcolors ]
    end

    def initialize(fgcolors, bgcolors)
      @fgcolors = fgcolors
      @bgcolors = bgcolors
    end

    def highlight(str)
      pre + str + post
    end

  end

  # Highlights using HTML. Fonts are highlighted using <span> tags, not <font>. 
  # Also note that reverse is translated to white on black. According to
  # http://www.w3.org/TR/REC-CSS2/syndata.html#value-def-color, valid color
  # keywords are: aqua, black, blue, fuchsia, gray, green, lime, maroon, navy,
  # olive, purple, red, silver, teal, white, and yellow. Thus, no magenta or
  # cyan, unlike ANSI.

  class HTMLHighlighter < Highlighter

    def self.make(str)
      fgcolors, bgcolors = parse_colors(str)
      HTMLHighlighter.new(fgcolors, bgcolors)
    end

    def initialize(fgcolors, bgcolors)
      super(fgcolors, bgcolors)
      @precode = nil
      @postcode = nil
    end

    def highlight(str)
      unless @precode
        @stack = []
        @precode = ""
        [ @fgcolors, @bgcolors ].each do |gp|
          gp.each do |color|
            @precode << name_to_code(color)
          end
        end

        @postcode = name_to_code("reset")
      end
      @precode + str + @postcode
    end

    # Returns the start tag for the given name.
    
    def start_style(name)
      case name
      when "reverse"
        "<span style=\"color: white; background-color: black\">"
      when /on_(\w+)/
        "<span style=\"background-color: #{$1}\">"
      else
        "<span style=\"color: #{name}\">"
      end
    end

    # Returns the end tag ("</span>").

    def end_style
      "</span>"
    end

    def color_value(cname)
      case cname
      when "cyan"
        "#00FFFF"
      when "magenta"
        "#FF00FF"
      else
        cname
      end
    end

    # Returns the code for the given name.

    def name_to_code(name)
      @stack << name

      case name
      when "none", "reset"
        @stack.pop
        str = ""
        if @stack.length > 0
          begin
            prev = @stack.pop
            case prev
            when "bold"
              str << "</b>"
            when "underscore", "underline"
              str << "</u>"
            when "blink"
              str << "</blink>"
            when "concealed"
              str << " -->"
            else
              str << end_style
            end
          end while @stack.length > 0
        end
        str
      when "bold"
        "<b>"
      when "underscore", "underline"
        "<u>"
      when "blink"
        "<blink>"
      when "concealed"
        "<!-- "
      else
        start_style(name)
      end
    end

  end


  # Highlights using ANSI escape sequences.

  class ANSIHighlighter < Highlighter

    @@ATTRIBUTES = Hash[
      'none'       => '0', 
      'reset'      => '0',
      'bold'       => '1',
      'underscore' => '4',
      'underline'  => '4',
      'blink'      => '5',
      'reverse'    => '7',
      'concealed'  => '8',
      'black'      => '30',
      'red'        => '31',
      'green'      => '32',
      'yellow'     => '33',
      'blue'       => '34',
      'magenta'    => '35',
      'cyan'       => '36',
      'white'      => '37',
      'on_black'   => '40',
      'on_red'     => '41',
      'on_green'   => '42',
      'on_yellow'  => '43',
      'on_blue'    => '44',
      'on_magenta' => '45',
      'on_cyan'    => '46',
      'on_white'   => '47',
    ]

    RESET = "\e[0m"
    
    def self.make(str)
      fgcolors, bgcolors = parse_colors(str)
      ANSIHighlighter.new(fgcolors, bgcolors)
    end

    def initialize(fgcolors, bgcolors)
      super
      @code = nil
      @reset = RESET
    end

    # Returns the escape sequence for the given name.

    def self.name_to_code(nm)
      "\e[#{@@ATTRIBUTES[nm]}m"
    end

    def highlight(str)
      unless @code
        @code = ""
        [ @fgcolors, @bgcolors ].each do |gp|
          gp.each do |color|
            @code << ANSIHighlighter.name_to_code(color)
          end
        end
      end
      @code + str + RESET
    end

  end
  
end


# String is extended to support highlighting.

class String
  @@ESCAPED = Hash[
    '\''       => '&apos;', 
    '"'        => '&quot;',
    '<'        => '&lt;',
    '>'        => '&gt;'
  ]
  @@ESCRE = Regexp.new("(" + @@ESCAPED.keys.join("|") + ")")

  def escape
    dup.escape!
  end

  def escape!
    gsub!(@@ESCRE) { |x| @@ESCAPED[x] }
  end
end


class IO

  # not used:
  def writeln(str = "")
    print str
    if GlarkOptions.instance.output == "html"
      puts "<br/>"
    else
      puts
    end
  end

  $-w = false

  # Reads the stream into an array. It works even when $/ == nil, which
  # works around a problem in Ruby 1.8.1.
  def readlines
    contents = []
    while ((line = gets) && line.length > 0)
      contents << line
    end
    contents
  end

  $-w = true

end


# -------------------------------------------------------
# Logging
# -------------------------------------------------------

# Very minimal logging output. If verbose is set, this displays the method and
# line number whence called. It can be a mixin to a class, which displays the
# class and method from where it called. If not in a class, it displays only the
# method.

# All kids love log.
class Log

  VERSION = "1.0.1"
  
  module Severity
    DEBUG = 0
    INFO  = 1
    WARN  = 2
    ERROR = 3
    FATAL = 4
  end

  include Log::Severity

  def initialize
    @width   = 0
    @output  = $stdout
    @fmt     = "[%s:%04d] {%s}"
    @level   = FATAL
  end
    
  def verbose=(v)
    @level = case v
             when TrueClass 
               DEBUG
             when FalseClass 
               FATAL
             when Integer
               v
             end
  end

  def verbose
    @level <= DEBUG
  end

  def level=(lvl)
    @level = lvl
  end

  # Assigns output to the given stream.
  def output=(io)
    @output = io
  end

  # Assigns output to a file with the given name. Returns the file; client
  # is responsible for closing it.
  def outfile=(f)
    @output = if f.kind_of?(IO) then f else File.new(f, "w") end
  end

  # Creates a printf format for the given widths, for aligning output.
  def set_widths(file_width, line_width, func_width)
    @fmt = "[%#{file_width}s:%#{line_width}d] {%#{func_width}s}"
  end

  def get_whence(c, classname)
    c.index(/(.*):(\d+)(?::in \`(.*)\')?/)
    file, line, func = $1, $2, ($3 || "???")
    file.sub!(/.*\//, "")

    func ||= "???"
    
    if classname
      func = classname + "#" + func
    end
    
    [ file, line, func ]
  end

  # Logs the given message.
  def log(msg = "", level = DEBUG, depth = 1, cname = nil, &blk)
    if level >= @level
      c = caller(depth)[0]
      file, line, func = get_whence(c, cname)
      print_formatted(file, line, func, msg, level, &blk)
    end
  end

  # Shows the current stack.
  def stack(msg = "", level = DEBUG, depth = 1, cname = nil, &blk)
    if level >= @level
      stk = caller(depth)
      stk.shift
      for c in stk
        file, line, func = get_whence(c, cname)
        print_formatted(file, line, func, msg, level, &blk)
        msg = '"'
      end
    end
  end

  def print_formatted(file, line, func, msg, level, &blk)
    hdr = sprintf @fmt, file, line, func
    print(hdr, msg, level, &blk)
  end
  
  def print(hdr, msg, level, &blk)
    if blk
      x = blk.call
      if x.kind_of?(String)
        msg = x
      else
        return
      end
    else
      @output.puts hdr + " " + msg.to_s.chomp
    end
  end

  # by default, class methods delegate to a single app-wide log.

  @@log = Log.new

  def Log.verbose
    @@log.verbose
  end

  def Log.verbose=(v)
    @@log.verbose = v && v != 0 ? DEBUG : FATAL
  end

  def Log.level=(lvl)
    @@log.level = lvl
  end

  # Creates a printf format for the given widths, for aligning output.
  def Log.set_widths(file_width, line_width, func_width)
    @@log.set_widths(file_width, line_width, func_width)
  end

  # Logs the given message.
  def Log.log(msg = "", level = DEBUG, depth = 1, cname = nil, &blk)
    @@log.log(msg, level, depth + 1, cname, &blk)
  end

  def Log.stack(msg = "", level = DEBUG, depth = 1, cname = nil, &blk)
    @@log.stack(msg, level, depth, cname, &blk)
  end

end


class AppLog < Log
  include Log::Severity

end


module Loggable

  # Logs the given message, including the class whence invoked.
  def log(msg = "", level = Log::DEBUG, depth = 1, &blk)
    AppLog.log(msg, level, depth + 1, self.class.to_s)
  end
  
  def stack(msg = "", level = Log::DEBUG, depth = 1, &blk)
    AppLog.stack(msg, level, depth + 1, self.class.to_s, &blk)
  end

end


# -------------------------------------------------------
# Input file
# -------------------------------------------------------

# A thing that can be grepped.
class InputFile
  include Loggable

  attr_reader :fname, :stati
  attr_accessor :count, :output, :invert_match

  # cross-platform end of line:   DOS  UNIX  MAC
  ANY_END_OF_LINE = Regexp.new(/(?:\r\n|\n|\r)/)

  WRITTEN = "written"
  
  def initialize(fname, io)
    @fname        = fname
    @io           = io
    @stati        = Array.new      # index = line number, value = context character
    @count        = nil
    @output       = nil
    @extracted    = nil
    @regions      = nil
    @modlines     = nil
    @invert_match = false
    @linecount    = nil
    @readall      = $/ != "\n"
    @lines        = @readall ? IO.readlines(@fname) : Array.new
  end
  
  def linecount
    unless @linecount
      @linecount = IO.readlines(@fname).size
    end
    @linecount
  end

  def each_line
    if @readall
      @lines.each do |line|
        yield line
      end
    else
      while (line = @io.gets) && line.length > 0
        @lines << line
        yield line
      end
    end
  end

  def set_status(from, to, ch, force = false)
    from.upto(to) do |ln|
      if (not @stati[ln]) || (@stati[ln] != WRITTEN && force)
        @stati[ln] = ch
      end
    end
  end

  def mark_as_match(start_line, end_line = start_line)
    if GlarkOptions.instance.output == "grep"
      end_line = start_line
    end

    GlarkOptions.instance.exit_status = invert_match ? 1 : 0

    if @count
      @count += 1
    else
      st = [0, start_line - GlarkOptions.instance.before].max
      set_status(st,           start_line - 1,                         "-")
      set_status(start_line,   end_line,                               ":",  true)
      set_status(end_line + 1, end_line + GlarkOptions.instance.after, "+")
    end
  end

  def write_matches(matching, from = nil, to = nil)
    @output.write_matches(matching, from, to)
  end

  def write_all
    @output.write_all
  end

  # Returns the lines for this file, separated by end of line sequences.
  def get_lines
    if $/ == "\n"
      @lines
    else
      unless @extracted
        @extracted = []
        
        # This is much easier. Just resplit the whole thing at end of line
        # sequences.
        
        eoline    = "\n"             # should be OS-dependent
        srclines  = @lines
        reallines = @lines.join("").split(ANY_END_OF_LINE)
        
        # "\n" after all but the last line
        (0 ... (reallines.length - 1)).each do |lnum|
          @extracted << reallines[lnum] + eoline
        end
        @extracted << reallines[-1]

        if AppLog.verbose
          @extracted.each_with_index do |line, idx|
            log "extracted[#{idx}]: #{@extracted[idx]}"
          end
        end
      end

      @extracted
    end
  end

  # Returns the given line for this file. For this method, a line ends with a
  # CR, as opposed to the "lines" method, which ends with $/.
  def get_line(lnum)
    get_lines()[lnum]
  end

  # returns the range that is represented by the region number
  def get_range(rnum)
    if $/ == "\n"
      # easy case: range is the range number, unless it is out of range.
      rnum < @lines.length ? (rnum .. rnum) : nil
    else
      unless @regions
        srclines = @modlines ? @modlines : @lines

        @regions = []           # keys = region number; values = range of lines

        lstart = 0
        srclines.each do |line|
          lend = lstart
          # log "considering <<#{line.gsub(/\n/, '\\n')}>>"
          # log "line.chomped: <<#{line.chomp.gsub(/\n/, '\\n')}>>"
          line.scan(ANY_END_OF_LINE).each do |cr|
            # log "cr: #{cr}"
            lend += 1
          end

          @regions << Range.new(lstart, lend - 1)

          lstart = lend
        end
      end

      @regions[rnum]
    end
  end
end

# -------------------------------------------------------
# Binary input file
# -------------------------------------------------------

class BinaryFile < InputFile

  def write_matches(matching, from, to)
    if count
      write_count(matching)
    else
      puts "Binary file " + @fname + " matches"
    end
  end

end



# -------------------------------------------------------
# Output format
# -------------------------------------------------------

class OutputFormat
  include Loggable

  attr_reader :formatted, :infile, :show_file_name
  attr_accessor :has_context

  def initialize(infile)
    @infile          = infile
    @show_file_name  = $files.size > 0 && ($files.size > 1 || FileTester.type($files[0]) == FileTester::DIRECTORY) && GlarkOptions.instance.show_file_names
    @formatted       = []
    @has_context     = false
  end

  # Prints the line, which is assumed to be 0-indexed, and is thus adjusted by
  # one.
  def print_line_number(lnum)
    printf "%5d ", lnum + 1
  end

  # prints the line, and adjusts for the fact that in our world, lines are
  # 0-indexed, whereas they are displayed as if 1-indexed.
  def print_line(lnum, ch = nil)
    AppLog.verbose && log("lnum #{lnum}, ch: '#{ch}'")
    begin
      lnums = @infile.get_range(lnum)
      AppLog.verbose && log("lnums(#{lnum}): #{lnums}")
      if lnums
        lnums.each do |ln|
          if show_line_numbers
            print_line_number(ln)
            if ch && has_context
              printf "%s ", ch
            end
          end
          puts @formatted[ln] || @infile.get_line(ln)
        end
      end
    rescue => e
      # puts e
      # puts e.backtrace
    end
  end

  def write_matches(matching, from, to)
    if @infile.count
      write_count(matching)
    elsif matching
      firstline = from ? from : 0
      lastline  = to   ? to   : @infile.lines.length - 1

      (firstline .. lastline).each do |ln|
        if @infile.stati[ln]
          unless @infile.stati[ln] == InputFile::WRITTEN
            if firstline > 0 && !@infile.stati[ln - 1] && has_context && GlarkOptions.instance.show_break
              $stdout.writeln "  ---"
            end
            
            print_line(ln, @infile.stati[ln]) 

            # AppLog.verbose && log("setting @infile.stati[#{ln}] (#{@infile.stati[ln]}) to written")
            
            @infile.stati[ln] = InputFile::WRITTEN
          end
        end

      end
    else
      firstline = from ? from : 0
      lastline  = to ? to : @infile.lines.length - 1
      (firstline .. lastline).each do |ln|
        unless @infile.stati[ln] && @infile.stati[ln] == ":"
          print_line(ln) 
        end
      end
    end
  end

  def write_all
    (0 ... @infile.lines.length).each do |ln|
      print_line(ln) 
    end
  end

  def get_line_to_print(lnum)
    formatted[lnum] || infile.get_line(lnum)
  end

  def show_line_numbers
    GlarkOptions.instance.show_line_numbers
  end

end


# -------------------------------------------------------
# Glark output format
# -------------------------------------------------------

class GlarkOutputFormat < OutputFormat

  def initialize(infile)
    super
    @has_context = GlarkOptions.instance.after != 0 || GlarkOptions.instance.before != 0
    @file_header_shown = false
  end

  # prints the line, and adjusts for the fact that in our world, lines are
  # 0-indexed, whereas they are displayed as if 1-indexed.
  def print_line(lnum, ch = nil)
    AppLog.verbose && log("lnum #{lnum}, ch: '#{ch}'")
    begin
      lnums = @infile.get_range(lnum)
      AppLog.verbose && log("lnums(#{lnum}): #{lnums}")
      if lnums
        lnums.each do |ln|
          println(ln, ch)
        end
      end
    rescue => e
      # puts e
      # puts e.backtrace
    end
  end

  def show_file_header
    unless @file_header_shown
      if GlarkOptions.instance.highlight
        print GlarkOptions.instance.file_highlight.highlight(@infile.fname)
      else
        print @infile.fname
      end

      if GlarkOptions.instance.output == "html"
        print "<br/>"
      end
      
      print "\n"
    end
    @file_header_shown = true
  end

  def print_line_number(lnum)
    if lnh = GlarkOptions.instance.line_number_highlight
      lnumstr = (lnum + 1).to_s
      pad = " " * ([5 - lnumstr.length, 0].max)
      print pad + " " + lnh.highlight(lnumstr) + " "
    else
      super
    end
  end
 
  def write_count(matching = true)
    ct = matching ? @infile.count : @infile.lines.size - @infile.count
    puts "    " + ct.to_s
  end

  def write_matches(matching, from = nil, to = nil)
    show_file_header if show_file_name
    super(matching, from, to)
  end

  def write_all
    show_file_header if show_file_name
    super
  end

  def println(ln, ch)
    if show_line_numbers
      print_line_number(ln)
    end
    
    if ch && has_context
      printf "%s ", ch
    end
    
    puts get_line_to_print(ln)
  end

end


class GlarkHTMLOutputFormat < GlarkOutputFormat

  def get_line_to_print(lnum)
    str = formatted[lnum]
    unless str
      str = infile.get_line(lnum)
      str.escape!
    end
    str + "<br/>"
  end
  
  def println(ln, ch)
    if show_line_numbers
      str = (ln + 1).to_s
      if str.length < 5
        str = ("&nbsp;" * (5 - str.length)) + str
      end
      printf str
    end
    
    if ch && has_context
      printf "%s&nbsp;", ch
    end
    
    puts get_line_to_print(ln)
  end

end

class GlarkXMLOutputFormat < GlarkHTMLOutputFormat

  def get_line_to_print(lnum)
    str = formatted[lnum]
    unless str
      str = infile.get_line(lnum)
      str.escape!
    end
    "<text>" + str + "</text>"
  end
  
  def println(ln, ch)
    puts "<match>"
    puts "    " + "<line>" + (ln + 1).to_s + "</line>"
    puts "    " + "<match>" +  ch + "</match>"
    puts "    " + get_line_to_print(ln)
    puts "</match>"
  end

end

class GlarkTextOutputFormat < GlarkOutputFormat
end

class GlarkANSIOutputFormat < GlarkOutputFormat
end


# -------------------------------------------------------
# Grep output format
# -------------------------------------------------------

# This matches grep, mostly. It is for running within emacs, thus,
# it does not support context or highlighting.
class GrepOutputFormat < OutputFormat

  def write_count(matching = true)
    print @infile.fname, ":" if show_file_name
    ct = matching ? @infile.count : @infile.lines.length - @infile.count
    puts ct
  end

  # prints the line, and adjusts for the fact that in our world, lines are
  # 0-indexed, whereas they are displayed as if 1-indexed.
  def print_line(lnum, ch = nil)
    print @infile.fname, ":" if show_file_name
    if show_line_numbers
      printf "%d: ", lnum + 1
    end

    print get_line_to_print(lnum)
  end

end


# -------------------------------------------------------
# File tester
# -------------------------------------------------------

class FileTester 
  include Loggable

  BINARY     = "binary"
  DIRECTORY  = "directory"
  NONE       = "none"
  TEXT       = "text"
  UNKNOWN    = "unknown"
  UNREADABLE = "unreadable"

  # the percentage of characters that we allow to be odd in a text file
  @@ODD_FACTOR = 0.3

  # how many bytes (characters) of a file we test
  @@TEST_LENGTH = 1024

  # extensions associated with files that are always text:
  @@KNOWN_TEXT = %w{ 
    c
    cpp
    css
    h
    f
    for
    fpp
    hpp
    html
    java
    mk
    php
    pl
    pm
    rb
    rbw
    txt
  }

  # extensions associated with files that are never text:
  @@KNOWN_NONTEXT = %w{ 
    Z
    a
    bz2
    elc
    gif
    gz
    jar
    jpeg
    jpg
    o
    obj
    pdf
    png
    ps
    tar
    zip
  }

  def FileTester.ascii?(c)
    # from ctype.h
    (c.to_i & ~0x7f) == 0
  end

  def FileTester.type(file)
    if File.exists?(file)
      if File.stat(file).file?
        if File.readable?(file)
          if FileTester.text?(file)
            TEXT
          else
            BINARY
          end
        else
          UNREADABLE
        end
      elsif File.stat(file).directory?
        DIRECTORY
      else
        UNKNOWN
      end
    else
      NONE
    end
  end

  def FileTester.is_text(ext)
    @@KNOWN_TEXT << ext
    @@KNOWN_NONTEXT.delete(ext)
  end

  def FileTester.is_nontext(ext)
    @@KNOWN_NONTEXT << ext
    @@KNOWN_TEXT.delete(ext)
  end

  def FileTester.text_extensions
    @@KNOWN_TEXT
  end

  def FileTester.nontext_extensions
    @@KNOWN_NONTEXT
  end

  def FileTester.text?(file)
    # Don't waste our time if it doesn't even exist:
    return false unless File.exists?(file)
    
    if file.index(/\.(\w+)\s*$/)
      suffix = $1
      return true  if @@KNOWN_TEXT.include?(suffix)
      return false if @@KNOWN_NONTEXT.include?(suffix)
    end
    
    ntested = 0
    nodd = 0

    AppLog.verbose && Log.log("reading #{file}")

    File.open(file) do |f|
      buf = f.read(@@TEST_LENGTH)
      if buf
        AppLog.verbose && Log.log("got buf; length = #{buf.length}")

        buf.each_byte do |ch|
          ntested += 1

          # never allow null in a text file
          return false if ch.to_i == 0
          
          nodd += 1 unless FileTester.ascii?(ch)
        end
      else
        # file had length of 0:
        return UNKNOWN
      end
    end
    FileTester.summary(nodd, ntested)
  end

  def FileTester.summary(nodd, ntested)
    nodd < ntested * @@ODD_FACTOR
  end

end



# -------------------------------------------------------
# Glark
# -------------------------------------------------------

# The main processor.
class Glark 
  include Loggable
  
  def initialize(func)
    @func = func
  end

  def search_file(input)
    outclass = case GlarkOptions.instance.output
               when "grep"
                 GrepOutputFormat
               when "html"
                 GlarkHTMLOutputFormat
               when "xml"
                 GlarkXMLOutputFormat
               when "ansi", "xterm"
                 GlarkANSIOutputFormat
               else
                 GlarkTextOutputFormat
               end
    
    output       = outclass.new(input)
    input.output = output

    input.count        = 0    if GlarkOptions.instance.count
    input.invert_match = true if GlarkOptions.instance.invert_match
    
    @func.process(input)
  end

  def search_text_file(fname)
    AppLog.verbose && log("searching #{fname} for #{@func}")
    input = InputFile.new(fname, fname == "-" ? $stdin : File.new(fname))
    search_file(input)
  end

  def blather(msg)
    $stderr.puts(msg) unless GlarkOptions.instance.quiet
  end

  def search_binary_file(fname)
    AppLog.verbose && log("searching binary file #{fname} for #{@func}")
    f = File.new(fname)
    f.binmode                # for MSDOS/WinWhatever
    bf = BinaryFile.new(fname, f)
    search_file(bf)
  end

  def skipped?(fname)
    ((GlarkOptions.instance.basename && !GlarkOptions.instance.basename.match(File.basename(fname))) ||
     (GlarkOptions.instance.fullname && !GlarkOptions.instance.fullname.match(fname)))
  end

  def read_file(fname, ftype)
    begin
      IO.readlines(fname)
    rescue => e
      AppLog.verbose && log("problems reading #{ftype} file #{fname} #{e}")
      $stderr.print "problems reading #{ftype} file #{fname} #{e}\n" unless $options.quiet
      nil
    end

  end
  
  def search(name)
    AppLog.verbose && log("searching #{name} for #{@func}")

    if GlarkOptions.instance.exclude_matching
      expr = GlarkOptions.instance.expr
      if expr.respond_to?(:re) && expr.re.match(name)
        AppLog.verbose && log("skipping file #{name} with matching name")
        return
      else
        AppLog.verbose && log("not skipping file #{name}")
      end
    end
        
    if name == "-" 
      AppLog.verbose && log("reading standard input...")
      blather "reading standard input..."
      search_text_file("-")
    else
      type = FileTester.type(name)
      case type
      when FileTester::TEXT
        if skipped?(name)
          AppLog.verbose && log("skipping file: #{name}")
        else
          AppLog.verbose && log("searching text")
          if false
            # readlines doesn't work with $/ == nil, so we'll use gets instead.
            # this has been fixed in the CVS version of Ruby (on 26 Dec 2003).
            text = []
            File.open(name) do |f|
              while ((line = f.gets) && line.length > 0)
                text << line
              end
            end
            AppLog.verbose && log("got text #{text.length}")
          end
          search_text_file(name)
        end
      when FileTester::BINARY
        if skipped?(name)
          AppLog.verbose && log("skipping file: #{name}")
        else
          AppLog.verbose && log("handling binary")
          
          case GlarkOptions.instance.binary_files
          when "without-match"
            AppLog.verbose && log("skipping binary file #{name}")
            
          when "binary"
            search_binary_file(name)
            
          when "text"
            AppLog.verbose && log("processing binary file #{name} as text")
            search_text_file(name)
          end
        end
      when FileTester::UNREADABLE
        AppLog.verbose && log("skipping unreadable")
        blather "file not readable: #{name}"
      when FileTester::NONE
        AppLog.verbose && log("skipping none")
        blather "WARNING: no such file: #{name}"
      when FileTester::UNKNOWN
        AppLog.verbose && log("skipping unknown")
        blather "WARNING: unknown file type: #{name}"
      when FileTester::DIRECTORY
        AppLog.verbose && log("processing directory")
        case GlarkOptions.instance.directory
        when "read"
          AppLog.verbose && log("directory: #{GlarkOptions.instance.directory}")
          blather "glark: #{name}: Is a directory"
        when "recurse"
          AppLog.verbose && log("recursing into directory #{name}")
          begin
            entries = Dir.entries(name).reject { |x| x == "." || x == ".." }
            entries.each do |e|
              search(name + "/" + e)
            end
          rescue Errno::EACCES => e
            blather "WARNING: directory not readable: #{name}"
          end
        when "skip"
          AppLog.verbose && log("skipping directory #{name}")
        else
          AppLog.verbose && log("directory: #{GlarkOptions.instance.directory}")
        end
      else
        print "unknown type #{type}"
      end
    end
  end
end


# -------------------------------------------------------
# Env
# -------------------------------------------------------

# Returns the home directory, for both Unix and Windows.

module Env

  def Env.home_directory
    if hm = ENV["HOME"]
      hm
    else
      hd = ENV["HOMEDRIVE"]
      hp = ENV["HOMEPATH"]
      if hd || hp
        (hd || "") + (hp || "\\")
      else
        nil
      end
    end
  end

  # matches single and double quoted strings:
  REGEXP = /                    # either:
              ([\"\'])          #     start with a quote, and save it ($1)
              (                 #     save this ($2)
                (?:             #         either (and don't save this):
                    \\.         #             any escaped character
                  |             #         or
                    [^\1\\]     #             anything that is not a quote ($1), and is not a backslash
                )*              #         as many as we can get
              )                 #         end of $2
              \1                #     end with the same quote we started with
            |                   # or
              (\S+)             #     plain old nonwhitespace ($3)
           /x
      
  # amazing that ruby-mode (Emacs) handled all that.
  
  
  # reads the environment variable, splitting it according to its quoting.
  def Env.split(varname)
    if v = ENV[varname]
      v.scan(REGEXP).collect { |x| x[1] || x[2] }
    else
      []
    end
  end

end


# -------------------------------------------------------
# Function Object
# -------------------------------------------------------

# A function object, which can be applied (processed) against a InputFile.
class FuncObj
  
  attr_accessor :match_line_number, :file, :matches, :invert_match

  def initialize
    @match_line_number = nil
    @matches = Array.new
    @invert_match = GlarkOptions.instance.invert_match
  end

  def add_match(lnum)
    @matches.push(lnum)
  end

  def start_position
    match_line_number
  end

  def end_position
    start_position
  end

  def reset_file(file)
    @match_line_number = nil
    @file              = file
    @matches           = Array.new
  end

  def range(var, infile)
    if var
      if var.index(/([\.\d]+)%/)
        count = infile.linecount
        count * $1.to_f / 100
      else
        var.to_f
      end
    else
      nil
    end
  end

  def process(infile)
    got_match = false
    reset_file(infile.fname)

    @opts = GlarkOptions.instance
    
    rgstart  = range(@opts.range_start, infile)
    rgend    = range(@opts.range_end,   infile)

    displaymatches = !@opts.file_names_only && @opts.filter && !@opts.count

    lastmatch = 0
    nmatches = 0
    lnum = 0
    infile.each_line do |line|
      if ((!rgstart || lnum >= rgstart) && 
          (!rgend   || lnum <= rgend)   &&
          evaluate(line, lnum, infile))
        
        mark_as_match(infile)
        got_match = true
        nmatches += 1
        
        if displaymatches
          infile.write_matches(!invert_match, lastmatch, lnum)
          lastmatch = lnum + 1
        elsif @opts.file_names_only
          # we don't need to match more than once
          break
        end
        
        if @opts.match_limit && nmatches >= @opts.match_limit
          # we've found the match limit
          break
        end
      end
      lnum += 1
    end
    
    if @opts.file_names_only
      if got_match != invert_match
        print infile.fname
        if @opts.write_null
          print "\0"
        else
          print "\n"
        end
      end
    elsif @opts.filter
      if invert_match
        infile.write_matches(false, 0, lnum)
      elsif got_match
        infile.write_matches(true, 0, lnum)
      end
    else
      infile.write_all
    end
  end

  def mark_as_match(infile)
    infile.mark_as_match(start_position)
  end

  def to_s
    inspect
  end
  
end


# -------------------------------------------------------
# Regular expression function object
# -------------------------------------------------------

# Applies a regular expression against a InputFile.
class RegexpFuncObj < FuncObj
  include Loggable

  attr_reader :re

  @@count = 0

  def initialize(re)
    @re      = re
    @file    = nil
    @opts    = GlarkOptions.instance
    @hlidx   = if @opts.highlight && @opts.text_highlights.length > 0
                 if @opts.highlight == "multi"
                   idx = @@count % @opts.text_highlights.length
                   @@count += 1
                   AppLog.verbose && log("highlighting index is #{idx}")
                   idx
                 else
                   0
                 end 
               else
                 AppLog.verbose && log("highlighting index is nil")
                 AppLog.verbose && log("@opts.highlight #{@opts.highlight}");
                 AppLog.verbose && log("@opts.text_highlights.length #{@opts.text_highlights.length} > 0")
                 nil
               end
    super()
  end

  def inspect
    @re.inspect
  end

  def match?(line)
    @re.match(line)
  end

  def evaluate(line, lnum, file)
    AppLog.verbose && log("evaluating <<<#{line[0 .. -2]}>>>")
    if md = match?(line)      
      AppLog.verbose && log("matched")
      if @opts.extract_matches
        if md.kind_of?(MatchData)
          # log "replacing line"
          line.replace(md[-1] + "\n")
          # line.gsub!(@re) { |m| Log.log "replacing with #{m}"; m }
        else
          AppLog.verbose && log("--not does not work with -v")
        end
      else
        AppLog.verbose && log("NOT replacing line")
      end
      
      @match_line_number = lnum

      # HTML escaping
      if (@opts.output == "xml" || @opts.output == "html") && !(file.output.formatted[lnum])
        str = file.get_line(lnum)
        file.output.formatted[lnum] = str.escape
      end

      # highlight what the regular expression matched
      if @opts.highlight
        AppLog.verbose && log("highlighting the matched expression")

        lnums = file.get_range(lnum)
        AppLog.verbose && log("lnums(#{lnum}): #{lnums}")
        if lnums
          lnums.each do |ln|
          str = file.output.formatted[ln] || file.get_line(ln)
            AppLog.verbose && log("file.output.formatted[#{ln}]: #{file.output.formatted[ln]}")
            AppLog.verbose && log("file.get_line(#{ln}): #{file.get_line(ln)}")
            AppLog.verbose && log("highlighting: #{str}")

            # must use the block form:
            file.output.formatted[ln] = str.gsub(@re) do |m|
              hlidx = @hlidx

              # there must be a better way than this, but we don't have MatchData in
              # this context:
              
              [ $1, $2, $3, $4, $5, $6, $7, $8, $9 ].each_with_index do |var, idx|
                if var
                  hlidx = idx
                  break
                end
              end

              hlstr = @opts.text_highlights[hlidx].highlight(m)
              AppLog.verbose && log("highlighted: #{hlstr}")
              hlstr
            end
            AppLog.verbose && log("file.output.formatted[#{ln}]: #{file.output.formatted[ln]}")
            
          end
        end
      end
      add_match(lnum)
      true
    else
      false
    end
  end
  
  def explain(level = 0)
    " " * level + to_s + "\n"
  end
  
end


# -------------------------------------------------------
# Regular expression extension
# -------------------------------------------------------

# Negates the given expression.
class NegatedRegexp < Regexp

  def match(str)
    !super
  end

end

class Regexp

  # Handles negation, whole words, and ignore case (Ruby no longer supports
  # Rexexp.new(/foo/i), as of 1.8).
  
  def Regexp.create(pattern, negated = false, ignorecase = false, wholewords = false, wholelines = false)
    # we handle a ridiculous number of possibilities here:
    #     /foobar/     -- "foobar"
    #     /foo/bar/    -- "foo", then slash, then "bar"
    #     /foo\/bar/   -- same as above
    #     /foo/bar/i   -- same as above, case insensitive
    #     /foo/bari    -- "/foo/bari" exactly
    #     /foo/bar\/i  -- "/foo/bar/i" exactly
    #     foo/bar/     -- "foo/bar/" exactly
    #     foo/bar/     -- "foo/bar/" exactly

    if pattern.sub!(/^!(?=\/)/, "")
      AppLog.verbose && Log.log("expression is negated")
      negated = true
    end

    if pattern.index(/^\/(.*[^\\])\/i$/)
      pattern    = $1
      ignorecase = true
    elsif pattern.index(/^\/(.*[^\\])\/$/)
      pattern    = $1
    elsif pattern.index(/^(\/.*)$/)
      pattern    = $1
    elsif pattern.index(/^(.*\/)$/)
      pattern    = $1
    end
    
    if wholewords
      # sanity check:

      # match "\w", A-Za-z0-9_, 
      stword = pattern.index(/^[\[\(]*(?:\\w|\w)/)

      re = Regexp.new('(?:                 # one of the following:
                           \\w             #   - \w for regexp
                         |                 # 
                           \w              #   - a literal A-Z, a-z, 0-9, or _
                         |                 # 
                           (?:             #   - one of the following:
                               \[[^\]]*    #         LB, with no RB until:
                               (?:         #      - either of:
                                   \\w     #         - "\w"
                                 |         # 
                                   \w      #         - a literal A-Z, a-z, 0-9, or _
                               )           #      
                               [^\]]*\]    #      - anything (except RB) to the next RB
                           )               #
                       )                   #
                       (?:                 # optionally, one of the following:
                           \*              #   - "*"
                         |                 # 
                           \+              #   - "+"
                         |                 #
                           \?              #   - "?"
                         |                 #
                           \{\d*,\d*\}     #   - "{3,4}", "{,4}, "{,123}" (also matches the invalid {,})
                       )?                  #
                       $                   # fin
                      ', 
                      Regexp::EXTENDED)
      endword = pattern.index(re)
      errs    = [ stword ? nil : "start", endword ? nil : "end" ].compact

      if errs.length > 0
        $stderr.puts "WARNING: pattern '#{pattern}' does not " + errs.join(" and ") + " on a word boundary"
      end
      pattern = '\b' + pattern + '\b'
    elsif wholelines
      pattern = '^'  + pattern + '$'
    end
    
    # log "pattern", pattern
    # log "ignorecase", ignorecase
    
    reclass = negated ? NegatedRegexp : Regexp
    if ignorecase
      regex = reclass.new(pattern, Regexp::IGNORECASE)
    else
      regex = reclass.new(pattern)
    end

    regex
  end
end


# -------------------------------------------------------
# Compound expression function object
# -------------------------------------------------------

# Associates a pair of expressions.
class CompoundExpression < FuncObj

  def initialize(op1, op2)
    @op1, @op2 = op1, op2
    @file = nil
    super()
  end

  def reset_file(file)
    @op1.reset_file(file)
    @op2.reset_file(file)
    super
  end

  def start_position
    @last_start
  end
  
end


# -------------------------------------------------------
# Or expression function object
# -------------------------------------------------------

# Evaluates both expressions.
class OrExpression < CompoundExpression

  def evaluate(line, lnum, file)
    # log self, "evaluating <<<#{line[0 .. -2]}>>>"

    m1 = @op1.evaluate(line, lnum, file)
    m2 = @op2.evaluate(line, lnum, file)

    if comp(m1, m2)
      if m1
        @last_start = @op1.start_position
        @last_end   = @op1.end_position
      end
      if m2
        @last_start = @op2.start_position
        @last_end   = @op2.end_position
      end
      
      @match_line_number = lnum
      add_match(lnum)
      true
    else
      false
    end
  end
  
  def inspect
    "(" + @op1.to_s + " or " + @op2.to_s + ")"
  end

  def end_position
    @last_end
  end

  def explain(level = 0)
    str  = " " * level + "either:\n"
    str += @op1.explain(level + 4)
    str += " " * level + operator + "\n"
    str += @op2.explain(level + 4)
    str
  end
  
end


# -------------------------------------------------------
# Inclusive or expression function object
# -------------------------------------------------------

# Evaluates both expressions, and is satisfied when either return true.
class InclusiveOrExpression < OrExpression

  def comp(m1, m2)
    m1 || m2
  end
  
  def operator
    "or"
  end

end


# -------------------------------------------------------
# Exclusive or expression function object
# -------------------------------------------------------

# Evaluates both expressions, and is satisfied when only one returns true.
class ExclusiveOrExpression < OrExpression

  def comp(m1, m2)
    m1 ^ m2
  end

  def operator
    "xor"
  end

end


# -------------------------------------------------------
# And expression function object
# -------------------------------------------------------

# Evaluates both expressions, and is satisfied when both return true.
class AndExpression < CompoundExpression
  
  def initialize(dist, op1, op2)
    @dist = dist
    super(op1, op2)
  end

  def mark_as_match(infile)
    infile.mark_as_match(start_position, end_position)
  end

  def match_within_distance(op, lnum)
    op.matches.size > 0 and (op.matches[-1] - lnum <= @dist)
  end

  def inspect
    str = "("+ @op1.to_s
    if @dist == 0
      str += " same line as "
    elsif @dist.kind_of?(Float) && @dist.infinite?
      str += " same file as "
    else 
      str += " within " + @dist.to_s + " lines of "
    end
    str += @op2.to_s + ")"
    str
  end

  def reset_match(op, lnum)
    op.matches.reverse.each do |m|
      if lnum - m <= @dist
        @last_start = m
        return true
      else
        return false
      end
    end
    true
  end

  def match?(line, lnum, file)
    m1 = @op1.evaluate(line, lnum, file)
    m2 = @op2.evaluate(line, lnum, file)

    if m1 and match_within_distance(@op2, lnum)
      reset_match(@op2, lnum)
    elsif m2 and match_within_distance(@op1, lnum)
      reset_match(@op1, lnum)
    else
      false
    end
  end

  def end_position
    [@op1.end_position, @op2.end_position].max
  end

  def evaluate(line, lnum, file)
    # log self, "evaluating line #{lnum}: #{line[0 .. -2]}"

    if match?(line, lnum, file)
      @match_line_number = lnum
      true
    else
      false
    end
  end

  def explain(level = 0)
    str = ""
    if @dist == 0
      str += " " * level + "on the same line:\n"
    elsif @dist.kind_of?(Float) && @dist.infinite?
      str += " " * level + "in the same file:\n"
    else 
      lnstr = @dist == 1 ? "line" : "lines"
      str += " " * level + "within #{@dist} #{lnstr} of each other:\n"
    end
    str += @op1.explain(level + 4)
    str += " " * level + "and\n"
    str += @op2.explain(level + 4)
    str
  end
  
end


# -------------------------------------------------------
# Expression function object creator
# -------------------------------------------------------

class ExpressionFactory
  include Loggable

  attr_reader :expr

  # reads a file containing one regular expression per line.
  def self.read_file(fname)
    AppLog.verbose && AppLog.log("reading file: #{fname}")
    expr = nil
    File.open(fname) do |file|
      file.each_line do |line|
        AppLog.verbose && AppLog.log("line: #{line}")
        line.chomp!
        unless line.empty?
          re = make_regular_expression(line.chomp)
          if expr
            expr = InclusiveOrExpression.new(expr, re)
          else
            expr = re
          end
        end
      end
    end
    AppLog.verbose && AppLog.log("returning expression #{expr}")
    expr
  end

  def self.make_regular_expression(pattern, negated = false)
    # this check is because they may have omitted the pattern, e.g.:
    #   % glark *.cpp
    if File.exists?(pattern) and !GlarkOptions.instance.quiet
      $stderr.print "WARNING: pattern '#{pattern}' exists as a file.\n"
      $stderr.print "    Pattern may have been omitted.\n"
    end

    regex = Regexp.create(pattern.dup, negated, GlarkOptions.instance.nocase, GlarkOptions.instance.whole_words, GlarkOptions.instance.whole_lines)
    RegexpFuncObj.new(regex)
  end

  # creates two expressions and returns them.
  def self.make_expressions(args)
    arg = args.shift
    a1 = make_expression(arg, args)

    arg = args.shift
    a2 = make_expression(arg, args)
    
    [ a1, a2 ]
  end

  # removes optional end tag
  def self.shift_end_tag(name, args)
    # explicit end tag is optional:
    args.shift if args[0] == ("--end-of-" + name)
  end
  
  def self.make_not_expression(args)
    expr = make_regular_expression(true)
    unless expr
      $stderr.print "ERROR: 'not' expression takes one argument\n"
      exit 2
    end

    # explicit end tag is optional:
    shift_end_tag("not", args)
    expr
  end

  def self.make_or_expression(args)
    a1, a2 = make_expressions(args)
    unless a1 && a2
      $stderr.print "ERROR: 'or' expression takes two arguments\n"
      exit 2
    end

    shift_end_tag("or", args)
    InclusiveOrExpression.new(a1, a2)
  end

  def self.make_xor_expression(args)
    a1, a2 = make_expressions(args)
    unless a1 && a2
      $stderr.print "ERROR: 'xor' expression takes two arguments\n"
      exit 2
    end

    shift_end_tag("xor", args)
    ExclusiveOrExpression.new(a1, a2)
  end

  def self.make_and_expression(arg, args)
    if arg == "-a"
      dist = args.shift
      # future version will support --and=NUM, with --and (no following =) defaulting to 0
      # elsif @current == "--and"
      # dist = "0"
    elsif arg.index(/^--and(?:=(\-?\d+))?$/)
      dist = $1 || args.shift
    end
    
    # check to ensure that this is numeric
    if !dist || (dist.to_i != GlarkOptions.instance.infinite_distance && !dist.index(/^\d+$/))
      $stderr.print "ERROR: invalid distance for 'and' expression: '#{dist}'\n" 
      $stderr.print "    expecting an integer, or #{GlarkOptions.instance.infinite_distance} for 'infinite'\n" 
      exit 2
    end
    
    if dist.to_i == GlarkOptions.instance.infinite_distance
      dist = 1.0 / 0.0            # infinity
    else
      dist = dist.to_i
    end

    a1, a2 = make_expressions(args)
    unless a1 && a2
      $stderr.print "ERROR: 'and' expression takes two arguments\n"
      exit 2
    end

    shift_end_tag("and", args)
    AndExpression.new(dist, a1, a2)
  end

  def self.make_expression(arg, args = [])
    if arg
      AppLog.verbose && AppLog.log("processing arg #{arg}")
      case arg
      when "--or", "-o"
        make_or_expression(args)
      when "--xor"
        make_xor_expression(args)
      when /^\-\-and/, /^\-a/
        make_and_expression(arg, args)
      when /^--/
        $stderr.print "option not understood: #{arg}"
        exit 2
      else
        # blather "assuming the last argument #{arg} is a pattern"
        make_regular_expression(arg)
      end
    else
      nil
    end
  end

end


# -------------------------------------------------------
# Help
# -------------------------------------------------------

class GlarkHelp

  def initialize
    puts "Usage: glark [options] expression file..."
    puts "Search for expression in each file or standard input."
    puts "Example: glark --and=3 'try' 'catch' *.java"
    puts ""

    puts "Input:"
    puts "  -0[nnn]                        Use \\nnn as the input record separator"
    puts "  -d, --directories=ACTION       Process directories as read, skip, or recurse"
    puts "      --binary-files=TYPE        Treat binary files as TYPE"
    puts "      --basename, --name EXPR    Search only files with base names matching EXPR"
    puts "      --fullname, --path EXPR    Search only files with full names matching EXPR"
    puts "  -M, --exclude-matching         Ignore files with names matching the expression"
    puts "  -r, --recurse                  Recurse through directories"
    puts ""

    puts "Matching:"
    puts "  -a, --and=NUM EXPR1 EXPR2      Match both expressions, within NUM lines"
    puts "  -b, --before NUM[%]            Restrict the search to the top % or lines"
    puts "      --after NUM[%]             Restrict the search to after the given location"
    puts "  -f, --file=FILE                Use the lines in the given file as expressions"
    puts "  -i, --ignore-case              Ignore case for matching regular expressions"
    puts "  -m, --match-limit=NUM          Find only the first NUM matches in each file"
    puts "  -o, --or EXPR1 EXPR2           Match either of the two expressions"
    puts "  -R, --range NUM[%] NUM[%]      Restrict the search to the given range of lines"
    puts "  -v, --invert-match             Show lines not matching the expression"
    puts "  -w, --word, --word-regexp      Put word boundaries around each pattern"
    puts "  -x, --line-regexp              Select entire line matching pattern"
    puts "      --xor EXPR1 EXPR2          Match either expression, but not both"
    puts ""

    puts "Output:"
    puts "  -A, --after-context=NUM        Print NUM lines of trailing context"
    puts "  -B, --before-context=NUM       Print NUM lines of leading context"
    puts "  -C, -NUM, --context[=NUM]      Output NUM lines of context"
    puts "  -c, --count                    Display only the match count per file"
    puts "  -F, --file-color COLOR         Specify the highlight color for file names"
    puts "      --no-filter                Display the entire file"
    puts "  -g, --grep                     Produce output like the grep default"
    puts "  -h, --no-filename              Do not display the names of matching files"
    puts "  -H, --with-filename            Display the names of matching files"
    puts "  -l, --files-with-matches       Print only names of matching file"
    puts "  -L, --files-without-match      Print only names of file not matching"
    puts "  -n, --line-number              Display line numbers"
    puts "  -N, --no-line-number           Do not display line numbers"
    puts "      --line-number-color COLOR  Specify the highlight color for line numbers"
    # puts "      --output=FORMAT            Produce output in the format (ansi, grep)"
    puts "  -T, --text-color COLOR         Specify the highlight color for text"
    puts "      --text-color-NUM COLOR     Specify the highlight color for regexp capture NUM"
    puts "  -u, --highlight[=FORMAT]       Enable highlighting. Format is single or multi"
    puts "  -U, --no-highlight             Disable highlighting"
    puts "  -y, --extract-matches          Display only the matching region, not the entire line"
    puts "  -Z, --null                     In -l mode, write file names followed by NULL"
    puts ""

    puts "Debugging/Errors:"
    puts "      --explain                  Write the expression in a more legible format"
    puts "  -q, --quiet                    Suppress warnings"
    puts "  -Q, --no-quiet                 Enable warnings"
    puts "  -s, --no-messages              Suppress warnings"
    puts "  -V, --version                  Display version information"
    puts "      --verbose                  Display normally suppressed output"

    puts ""
    puts "See the man page for more information."
  end

end


# -------------------------------------------------------
# Options
# -------------------------------------------------------

class GlarkOptions
  include Loggable, Singleton

  attr_accessor :after
  attr_accessor :before
  attr_reader   :binary_files
  attr_reader   :count
  attr_reader   :directory
  attr_reader   :exclude_matching
  attr_accessor :exit_status
  attr_accessor :explain
  attr_accessor :expr
  attr_accessor :extract_matches
  attr_accessor :file_highlight
  attr_accessor :file_names_only
  attr_accessor :filter
  attr_reader   :highlight
  attr_accessor :infinite_distance
  attr_accessor :invert_match
  attr_accessor :basename
  attr_accessor :fullname
  attr_accessor :nocase
  attr_accessor :match_limit
  attr_accessor :package
  attr_accessor :local_config_files
  attr_accessor :quiet
  attr_accessor :range_end
  attr_accessor :range_start
  attr_accessor :show_file_names
  attr_accessor :show_line_numbers
  attr_accessor :line_number_highlight
  attr_accessor :text_highlights
  attr_accessor :verbose
  attr_accessor :version
  attr_accessor :whole_lines
  attr_accessor :whole_words
  attr_accessor :write_null
  attr_accessor :show_break
  attr_accessor :highlighter
  attr_accessor :output
  attr_accessor :split_as_path

  def initialize
    @after             = 0          # lines of context before the match
    @before            = 0          # lines of context after the match
    @binary_files      = "binary"   # 
    @count             = false      # just count the lines
    @directory         = "read"     # read, skip, or recurse, a la grep
    @expr              = nil        # the expression to be evaluated
    @exclude_matching  = false      # exclude files whose names match the expression
    @exit_status       = 1          # 0 == matches, 1 == no matches, 2 == error
    @explain           = false      # display a legible version of the expression
    @extract_matches   = false      # whether to show _only_ the part that matched
    @file_names_only   = false      # display only the file names
    @filter            = true       # display only matches

    @infinite_distance = -1         # signifies no limit to the distance between
                                    # matches, i.e., anywhere within the entire file is valid.

    @invert_match      = false      # display non-matching lines
    @basename          = nil        # match files with this basename
    @fullname          = nil        # match files with this full name
    @nocase            = false      # match case
    @match_limit       = nil        # the maximum number of matches to display per file
    @package           = $PACKAGE
    @local_config_files = false
    @quiet             = false      # minimize warnings
    @range_end         = nil        # range to stop searching; nil => the entire file
    @range_start       = nil        # range to begin searching; nil => the entire file
    @show_line_numbers = true       # display numbers of matching lines
    @show_file_names   = true       # show the names of matching files
    @verbose           = nil        # display debugging output
    @version           = $VERSION
    @whole_lines       = false      # true means patterns must match the entire line
    @whole_words       = false      # true means all patterns are '\b'ed front and back
    @write_null        = false      # in @file_names_only mode, write '\0' instead of '\n'
    @show_break        = false      # whether to show the break between sections
    @split_as_path     = true       # whether to split arguments that include the path separator

    @highlight         = "multi"    # highlight matches (using ANSI codes)

    @text_highlights       = []
    @file_highlight        = nil
    @line_number_highlight = nil

    set_output_style("ansi")

    reset_colors
  end

  def multi_colors 
    [ 
      @highlighter.make("black on yellow"),
      @highlighter.make("black on green"),
      @highlighter.make("black on magenta"),

      @highlighter.make("yellow on black"),
      @highlighter.make("magenta on black"),
      @highlighter.make("green on black"),
      @highlighter.make("cyan on black"),

      @highlighter.make("blue on yellow"),
      @highlighter.make("blue on magenta"),
      @highlighter.make("blue on green"),
      @highlighter.make("blue on cyan"),

      @highlighter.make("yellow on blue"),
      @highlighter.make("magenta on blue"),
      @highlighter.make("green on blue"),
      @highlighter.make("cyan on blue"),
    ]
  end

  def single_color
    [
      @highlighter.make("black on yellow")
    ]
  end

  def reset_colors
    if @highlight && @highlighter
      @text_highlights       = case @highlight
                               when "multi", "on", "true", "yes", true
                                 multi_colors
                               when "single"
                                 single_color
                               when "none", "off", "false", "no", false
                                 nil
                               else
                                 $stderr.print "WARNING: highlight format '" + @highlight.to_s + "' not recognized\n"
                                 single_color
                               end
      @file_highlight        = @highlighter.make("reverse bold")
      @line_number_highlight = nil # @highlighter.make("none")
    else
      @text_highlights       = []
      @file_highlight        = nil
      @line_number_highlight = nil
    end
    
    log(sprintf("%s: %s\n", "text_highlights", @text_highlights.collect { |hl| hl.highlight("text") }.join(", ")))
  end

  def highlight=(type)
    @highlight = type
    reset_colors
  end

  def set_output_style(output)
    AppLog.verbose && log("style: #{output}")
    
    @output            = output

    log(sprintf("%s: %s\n", "text_highlights", @text_highlights.collect { |hl| hl.highlight("text") }.join(", ")))

    @highlighter       = case @output
                         when "html", "xml"
                           Text::HTMLHighlighter
                         when "ansi", "xterm"
                           Text::ANSIHighlighter
                         when "grep"
                           @highlight         = false
                           @show_line_numbers = false
                           @after             = 0
                           @before            = 0
                           nil
                         when "text"
                           @highlight         = nil
                           nil
                         end

    reset_colors
  end

  def run(args)
    @args = args
    
    AppLog.verbose && log("")

    if hd = Env.home_directory
      homerc = hd + "/.glarkrc"
      read_rcfile(homerc)
    end

    if @local_config_files
      dir = File.expand_path(".")
      AppLog.verbose && log("starting with #{dir}")
      while dir != "/" && dir != hd
        rcfile = dir + "/.glarkrc"
        AppLog.verbose && log("looking for #{rcfile}")
        if File.exists?(rcfile)
          read_rcfile(rcfile)
          break
        else
          AppLog.verbose && log("not found #{rcfile}")
          dir = File.dirname(dir)
        end
      end
    end

    read_environment_variable

    # honor thy EMACS; go to grep mode
    set_output_style("grep") if ENV["EMACS"]

    read_options
    validate

    if @verbose
      methods.sort.each do |meth|
        # call the accessor for every setter method
        if meth.index(/^(\w+)=$/)
          acc = $1
          m = method(acc)
          AppLog.verbose && log(sprintf("%-20s: %s", acc, m.call))
        end
      end
    end
  end

  def read_rcfile(fname)
    AppLog.verbose && log("reading RC file: #{fname}")
    
    if File.exists?(fname)
      IO.readlines(fname).each do |line|
        line.sub!(/\s*#.*/, "")
        line.chomp!
        name, value = line.split(/\s*[=:]\s*/)
        next unless name && value

        AppLog.verbose && log("name: #{name}; value: #{value}")
                              
        case name
        when "after-context"
          @after = value.to_i
        when "before-context"
          @before = value.to_i
        when "binary-files"
          @binary_files = value
        when "context"
          @after = @before = value == "all" ? -1 : value.to_i
        when "expression"
          # this should be more intelligent than just splitting on whitespace:
          @expr = ExpressionFactory.make_expression(value.split(/\s+/))
        when "file-color"
          @file_highlight = make_highlight(name, value)
        when "filter"
          @filter = to_boolean(value)
        when "grep"
          set_output_style("grep") if to_boolean(value)
        when "highlight"
          @highlight = value
        when "ignore-case"
          @nocase = to_boolean(value)
        when "known-nontext-files"
          value.split(/\s+/).each do |ext|
            FileTester.is_nontext(ext)
          end
        when "known-text-files"
          value.split(/\s+/).each do |ext|
            FileTester.is_text(ext)
          end
        when "local-config-files"
          @local_config_files = to_boolean(value)
        when "line-number-color"
          @line_number_highlight = make_highlight(name, value)
        when "output"
          set_output_style(value)
        when "show-break"
          @show_break = to_boolean(value)
        when "quiet"
          @quiet = to_boolean(value)
        when "text-color"
          @text_highlights = [ make_highlight(name, value) ]
        when /^text\-color\-(\d+)$/
          @text_highlights[$1.to_i] = make_highlight(name, value)
        when "verbose"
          @verbose = to_boolean(value) ? 1 : nil
          AppLog.verbose = @verbose
        when "verbosity"
          @verbose = value.to_i
          AppLog.verbose = @verbose
        when "split-as-path"
          @split_as_path = to_boolean(value)
        end
      end
    end
  end
  
  # creates a color for the given option, based on its value
  def make_highlight(opt, value)
    if value
      GlarkOptions.instance.highlighter.make(value)
    else
      $stderr.print "ERROR: " + opt + " requires a color\n"
      exit 2
    end
  end

  # returns whether the value matches a true value, such as "yes", "true", or "on".
  def to_boolean(value)
    [ "yes", "true", "on" ].include?(value.downcase)
  end

  def read_environment_variable
    # process the environment variable
    options = Env.split("GLARKOPTS")
    AppLog.verbose && log("options: #{options.join(', ')}")
    while options.length > 0
      opt = options.shift
      process_option(opt, options)
    end
  end

  def read_options
    AppLog.verbose && log("")
    nargs = @args.size
    args = @args.dup

    @expr = nil

    while @args.length > 0
      arg = @args.shift
      process_option(arg, @args)
      break if @expr
    end

    unless @expr
      # were any options processed?

      # A lone option of "-v" means version, if there was nothing else on the
      # command line. For grep compatibility, "-v" with an expression has to
      # mean an inverted match.

      if nargs == 1 && args[0] == "-v"
        show_version
      elsif nargs > 0
        $stderr.print "No expression provided.\n"
      end
      
      $stderr.print "Usage: glark [options] expression file...\n"
      $stderr.print "Try `glark --help' for more information.\n"
      exit 1
    end
  end

  def process_option(opt, args)
    AppLog.verbose && log("processing option #{opt}")
    case opt

    when /^-0(\d{0,3})/
      AppLog.verbose && log("got record separator")
      if $1.size.zero?
        $/ = "\n\n"
      else
        val = $1.oct
        begin
          $/ = $1.oct.chr
        rescue RangeError => e
          # out of range (e.g., 777) means nil:
          $/ = nil
        end
      end
      AppLog.verbose && log("record separator set to #{$/}")

      # after (context)
    when "-A"
      @after = args.shift.to_i
    when /^--after-context(?:=(\d+))?/
      ac = $1 ? $1 : args.shift
      @after = ac.to_i

      # before (context)
    when "-B"
      @before = args.shift.to_i
    when /^--before-context(?:=(\d+))?/
      bc = $1 ? $1 : args.shift
      @before = bc.to_i

      # after (range)
    when "--after"
      @range_start = args.shift

      # before (range)
    when "-b", "--before"
      @range_end = args.shift
    when /^--before(?:=(\d+%))/
      @range_end = $1
      
      # range
    when "-R", "--range"
      @range_start, @range_end = args.shift, args.shift

      # context
    when "-C"
      nxt = args.shift
      # keep it if it is a number, else use the default
      if nxt =~ /^\d+/
        @before = @after = nxt.to_i
      else
        @before = @after = 2
        args.unshift(nxt)
      end
    when /^--context(?:=(\d+))?/
      @after = @before = if $1 then $1.to_i else 2 end
    when /^-([1-9]\d*)$/
      @after = @before = $1.to_i
      AppLog.verbose && log("@after = #{@after}; @before = #{@before}")

      # highlighting
    when "-u", /^--highlight(?:=(.+))?$/
      self.highlight = $1 || "multi"

    when "-U", "--no-highlight"
      @highlight = nil
      
      # version
    when "-V", "--version"
      show_version

      # verbose
    when /^--verbos(?:e|ity)(?:=(\d+))?/
      @verbose = $1 ? $1.to_i : 1
      AppLog.verbose = @verbose

    when "-v", "--invert-match"
      @invert_match = true
      @exit_status  = 0
    when "-i", "--ignore-case"
      @nocase = true

      # filter
    when "--filter"
      @filter = true

      # filter
    when /--no-?filter/
      @filter = false

      # grep
    when "-g", "--grep"
      set_output_style("grep")

      # help
    when "-?", "--help"
      GlarkHelp.new
      exit 0

      # regexp explanation
    when "--explain"
      @explain = true

      # line numbers
    when "-N", "--no-line-number"
      @show_line_numbers = false
    when "-n", "--line-number"
      @show_line_numbers = true
    when "--line-number-color"
      @line_number_highlight = make_highlight(opt, args.shift)

      # quiet
    when "-q", "-s", "--quiet", "--messages"
      @quiet = true
    when "-Q", "-S", "--no-quiet", "--no-messages"
      @quiet = false

    when "-m", /^--match-limit(?:=(\d+))?/
      ml = $1 || args.shift
      @match_limit = ml.to_i
      
      # whole words
    when "-w", "--word", "--word-regexp"
      @whole_words = true

      # whole lines
    when "-x", "--line-regexp"
      @whole_lines = true
      
      # file names only
    when "-l", "--files-with-matches"
      @file_names_only = true
      @invert_match = false
    when  "-L", "--files-without-match"
      @file_names_only = true
      @invert_match = true

      # For selecting by file base name, like find(1) --name
    when /^--(?:basename|name)(?:=(.+))?$/
      arg = $1 || args.shift
      # Make this a regexp.  If they want globs they can use find.
      @basename = Regexp.create(arg.dup)

      # For selecting by file full name, like find(1) --path
    when /^--(?:path|fullname)(?:=(.+))?$/
      arg = $1 || args.shift
      @fullname = Regexp.create(arg.dup)

      # colors
    when "-T", /^--text-color(?:=(.+))?$/
      thl = $1 || args.shift
      @text_highlights = [ make_highlight(opt, thl) ]
    when /^--text-color-(\d+)(?:=(.+))?$/
      idx = $1.to_i
      thl = $2 || args.shift
      @text_highlights[idx] = make_highlight(opt, thl)
    when "-F", /^--file-color(?:=(.+))?/
      fhl = $1 || args.shift
      @file_highlight = make_highlight(opt, fhl)

    when "-c", "--count"
      @count = true

    when "-Z", "--null"
      @write_null = true

    when "-M", "--exclude-matching"
      @exclude_matching = true
      
    when "-d"
      @directory = args.shift
    when /^--directories(?:=(\w+))?/
      @directory = $1

    when "-r", "--recurse"
      @directory = "recurse"

    when "-o", "-a"
      @expr = ExpressionFactory.make_expression(opt, args)
      return @expr               # we are done.

    when "-H", /^--with-?filenames?$/
      @show_file_names = true
      
    when "-h", /^--no-?filenames?$/
      @show_file_names = false
      
    when /^--binary-files?=\"?(\w+)\"?/
      @binary_files = $1
      AppLog.verbose && log("set binary_files to #{@binary_files}")

    when "-y", "--extract-matches"
      AppLog.verbose && log("set extract matches")
      @extract_matches = true

    when /^--run(?:=(\w+))?/
      @run = $1 ? $1.to_i : 2
      
    when /^--output=(\w+)/
      set_output_style($1)
      
    when /^(\-(?:[1-9]\d*|\w))(.+)/
      # handles -13wo (-13, -w, -o)

      opt, rest = $1, "-" + $2
      AppLog.verbose && log("opt, rest = #{opt}, #{rest}")
      args.unshift(rest)
      AppLog.verbose && log("args = #{args}")
      return process_option(opt, args)

    when "-f"
      @expr = ExpressionFactory.read_file(args.shift)
      AppLog.log "expression: #{@expr}"
    when /^--file=(.+)$/
      @expr = ExpressionFactory.read_file($1)
      AppLog.log "expression: #{@expr}"

    when "--no-split-as-path"
      @split_as_path = false

    when /^--split-as-path(?:=(.+))?$/
      value = $1 || "true"
      @split_as_path = to_boolean(value)
      
    when "--config"
      printf "%s: %s\n", "after", @after
      printf "%s: %s\n", "basename", @basename
      printf "%s: %s\n", "before", @before
      printf "%s: %s\n", "binary_files", @binary_files
      printf "%s: %s\n", "count", @count
      printf "%s: %s\n", "directory", @directory
      printf "%s: %s\n", "exclude_matching", @exclude_matching
      printf "%s: %s\n", "explain", @explain
      printf "%s: %s\n", "expr", @expr
      printf "%s: %s\n", "extract_matches", @extract_matches
      printf "%s: %s\n", "file_highlight", if @file_highlight then @file_highlight.highlight("filename") else "filename" end
      printf "%s: %s\n", "file_names_only", @file_names_only
      printf "%s: %s\n", "filter", @filter
      printf "%s: %s\n", "fullname", @fullname
      printf "%s: %s\n", "highlight", @highlight
      printf "%s: %s\n", "infinite_distance", @infinite_distance
      printf "%s: %s\n", "invert_match", @invert_match
      printf "%s: %s\n", "known_nontext_files", FileTester.nontext_extensions.join(", ")
      printf "%s: %s\n", "known_text_files", FileTester.text_extensions.join(", ")
      printf "%s: %s\n", "local_config_files", @local_config_files
      printf "%s: %s\n", "line_number_highlight", if @line_number_highlight then @line_number_highlight.highlight("12345") else "12345" end
      printf "%s: %s\n", "match_limit", @match_limit
      printf "%s: %s\n", "nocase", @nocase
      printf "%s: %s\n", "output", @output
      printf "%s: %s\n", "package", @package
      printf "%s: %s\n", "quiet", @quiet
      printf "%s: %s\n", "range_end", @range_end
      printf "%s: %s\n", "range_start", @range_start
      printf "%s: %s\n", "show_break", @show_break
      printf "%s: %s\n", "show_file_names", @show_file_names
      printf "%s: %s\n", "show_line_numbers", @show_line_numbers
      printf "%s: %s\n", "text_highlights", @text_highlights.collect { |hl| hl.highlight("text") }.join(", ")
      printf "%s: %s\n", "verbose", @verbose
      printf "%s: %s\n", "version", @version
      printf "%s: %s\n", "whole_lines", @whole_lines
      printf "%s: %s\n", "whole_words", @whole_words
      printf "%s: %s\n", "write_null", @write_null
      printf "%s: %s\n", "ruby version", RUBY_VERSION
      exit
      
      # the expression
    else
      AppLog.log "expression: #{@expr}"
      if @expr
        # we already have an expression
        args.unshift(opt)
        return @expr
      else
        AppLog.verbose && log("not an option: #{opt}")
        if args
          @expr = ExpressionFactory.make_expression(opt, args)
          return @expr          # we are done.
        end
      end
    end
    nil                         # we're not done.
  end

  # check options for collisions/data validity
  def validate
    if @range_start && @range_end
      pctre = Regexp.new(/([\.\d]+)%/)
      smd = pctre.match(@range_start)
      emd = pctre.match(@range_end)

      # both or neither are percentages:
      if !smd == !emd
        if smd
          if smd[1].to_f > emd[1].to_f
            $stderr.puts "ERROR: range start (#{smd}) follows range end (#{emd})"
            exit 2
          end
        elsif @range_start.to_i > @range_end.to_i
          $stderr.puts "ERROR: range start (#{@range_start}) follows range end (#{@range_end})"
          exit 2
        end
      end
    end
  end

  def show_version
    puts @package + ", version " + @version
    puts "Written by Jeff Pace (jpace@incava.org)."
    puts "Released under the Lesser GNU Public License."
    exit 0
  end
  
end


# -------------------------------------------------------
# main()
# -------------------------------------------------------

begin
  AppLog.set_widths(8, 5, -45)

  AppLog.log "loading options"
  GlarkOptions.instance.run(ARGV)
  AppLog.log "done loading options"

  # To get rid of the annoying stack trace on ctrl-C:
  trap("INT") { abort }

  puts GlarkOptions.instance.expr.explain if GlarkOptions.instance.explain

  glark = Glark.new(GlarkOptions.instance.expr)
  $files = if ARGV.size > 0 then
             if GlarkOptions.instance.split_as_path
               ARGV.collect { |f| f.split(File::PATH_SEPARATOR) }.flatten
             else
               ARGV
             end
           else 
             [ '-' ]
           end
  
  $files.each do |f|
    glark.search(f) 
  end
rescue => e
  # show the message, and the stack trace only if verbose:
  $stderr.puts "error: #{e}"
  raise if GlarkOptions.instance.verbose
end


__END__
# prototype of forthcoming feature:

# multi-pass execution:
./glark --run=2 '/(\w+)\s*=\s*\d+/' *.c

# means extract twice:

first run:
    matches = Array.new
    GlarkOptions.matches = matches
    glark = Glark.new(:write => false)
    glark.search($files)

second run:
    GlarkOptions.matches = nil
    expr = MultiOrExpression.new(matches)
    glark = Glark.new(ARGV, :expr => expr)
    glark.search($files)
