#!/bin/bash

# findu8 - find files with invalid utf8 names
# Copyright © 2000-2009 by Pádraig Brady <P@draigBrady.com>.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# See the GNU General Public License for more details,
# which is available at www.gnu.org


# Notes:
#
# This is slow as it starts an iconv process per file,
# but it mitigates this by filtering out ascii only names beforehand.

script_dir=$(dirname "$0")              #directory of this script
script_dir=$(readlink -f "$script_dir") #Make sure absolute path

. "$script_dir"/supprt/fslver

Usage() {
	ProgName=$(basename "$0")
	echo "find names with invalid UTF8 encoding.
Usage: $ProgName [[-r] [-f] paths(s) ...]

If no path(s) specified then the currrent directory is assumed."
	exit
}

for arg
do
	case "$arg" in
	-h|--help|-help)
		Usage ;;
	-v|--version)
		Version ;;
	*)
		argsToPassOn="$argsToPassOn $(shell_quote "$arg")" ;;
	esac
done

. "$script_dir"/supprt/getfpf "$argsToPassOn"

find "$@" -printf "$FPF\0" |
sort -zu | #merge files (indirectly) specified multiple times
tr '\n\0' '\1\n' | #convert \n to \1 so grep works
LANG=C grep -E '[^ -~]' | #filter out purely ASCII names
while read; do
    name="$REPLY"
    echo "$name" | iconv --from=utf8 --to=utf8 >/dev/null 2>&1 || echo "$name"
done |
tr '\n\1' '\0\n' | #convert back from \1 to \n
if [ ! -p /proc/self/fd/1 ]; then
    xargs -r0 ls -b1Ud --color=auto --
else
    cat
fi
