#! /usr/bin/perl

=head1 NAME

dbmerge - merge splited DocBook document

=head1 SYNOPSIS

B<dbmerge> [I<options>] I<input_file> [I<output_file>]

B<dbmerge> B<--help>

=head1 DESCRIPTION

B<dbmerge> process C<xi:include> elements (see L<http://www.w3.org/TR/xinclude/>).
That is merge splited XML file to one file.

All C<xml:base> attributes (see L<http://www.w3.org/TR/xmlbase/>) are processed
and them removed. Relative references in C<fileref> and C<url> attributes
are properly rewrited.

If no output file name is specified on command line, output is dumped to standard
output and relative references are rewrited as if output file would be in current
work directory.

=head1 OPTIONS

=over

=item B<--lo-dirs>=I<PATHS>

Coma-separated list of paths in which are located local objects (see B<LOCAL OBJECTS> below).
All paths are expanded as the standart Unix shell would do, see L<File::Glob(3perl)> for details.
Relative paths is related to curent working directory. If path contains C<$XML_BASE> at the begining
it is replaced by real base of current processing document.
Default paths are F<LocalObjects>, F<MediaObjects>, F<ImageObjects> and F<#Pictures>
in base directory of current processing document.

=item B<--lo-dirs-add>=I<PATHS>

Coma-separeted list of paths which are addeded to preiously specified or the default 
local objects paths. See B<--lo-dirs> option.

=item B<--lo-dir>=I<DIRECTORY>

Name of local objects directory for newly created documents (see B<LOCAL OBJECTS> below).
Relative path is related to newly
created documents. Default directory is F<LocalObjects>.

=item B<--lo-move>

Move local objects instead copy them. See B<LOCAL OBJECTS> below.

=item B<--xmlbase>=I<URI>

XML base URI (Uniform Resource Identifier) for input document.
See L<http://www.w3.org/TR/xmlbase/> for details.
You may need change default if you simple copy an DocBook document
without copying referenced files nor changing C<fileref> attributes.
On this case set XML base to orginal location. Note, URI is required not (unix)
file name.

=item B<-v>, B<--verbose>

Increase verbosity level.

=item B<-q>, B<--quiet>

Decrease verbosity level.

=item B<-V>, B<--version>

Dump version information and exit.

=item B<-h>, B<-?>, B<--help>

Dump help screen and exit.

=item B<--man>

Show this manual page and exit.

=back

=head1 LOCAL OBJECTS

I<Local objects> are special files (pictures, audio files, ...) in
directories specified by B<--lo-dirs> option.
If DocBook document refer (via C<fileref> attribute) to this directory, 
the files are copied (or moved) to local "Local Objects Directory"
specified by B<--lo-dir> option. 
Moreower all files with same name and differ suffix (after last dot)
are copied (or moved) too.

=head1 BUGS AND TODO

=over

=item *

There is no support for attributes C<parse> and C<encoding> in C<xi:include> element yet.

=item *

There is no support for C<xi:fallback> element yet.

=back

=head1 SEE ALSO

L<dbautosplit(3)>, 
"DocBook: The Definitive Guide" L<http://www.docbook.org/tdg/en/html/docbook.html>

=head1 AUTHOR

Martin Lazar <mlazar@suse.cz>

=cut

use strict;
use warnings;

use Getopt::Long qw(:config no_ignore_case bundling);
use XML::DOM;
use File::Basename;
use File::Path;
use File::Copy;
use URI;
use URI::WithBase;
use URI::file;
use Pod::Usage;

our $VERSION = "0.6";

## configuration, help, version and usage
#####################################################################
our @opt_lodirs = map {'$XML_BASE/' . $_} qw(LocalObjects MediaObjects ImageObjects);
our $opt_lodir = "LocalObjects";
our $opt_lomove;
our $opt_xmlbase;
our $opt_encoding;
our $opt_xidtd;
our $verbosity_level = 1; # -1 - none, 0 - only warnings, 1 - normal messages, 2 - more messages, 3 - more more ...

GetOptions(
	"lo-dirs|lo-dirs-set=s" => sub { @opt_lodirs = ( split ",", $_[1]) },
	"lo-dirs-add=s" => sub { @opt_lodirs = ( @opt_lodirs, split ",", $_[1]) },
	"lo-dir=s" => \$opt_lodir,
	"xmlbase=s" => \$opt_xmlbase,
	"lo-move!" => \$opt_lomove,
	"verbose|v+" 	=> \$verbosity_level,
	"quiet|q" 	=> sub { $verbosity_level-- },
	"help|?|h!" 	=> \&help,
	"version|V!" 	=> \&version,
	"encoding=s"	=> \$opt_encoding,
	"xi-dtd:s"	=> \$opt_xidtd,
	"man!"		=> sub { pod2usage(-verbose => 2) },
	) or usage();
usage("to many arguments") if @ARGV>2;
usage("missing input file") unless @ARGV;

sub help {
	print <<"EOHELP";
Usage: $0 [options] input_file [output_file]
Merge splitted DocBook document.

Options:
  --lo-dirs=GLOB[,...]             local objects search paths
  --lo-dir=DIRECTORY		   local objects directory
  --lo-move			   move local objects instead copy them
  
  --xmlbase=URI		 	   base URI for relative URIs
  --encoding=ENC		   enforce output encoding

  -v, --verbose			   increase verbosity level
  -q, --quiet			   decrease verbosity level
  -V, --version			   print version information
  -h, --help			   print this help
  --man				   show manual page

If output file name is not specified output file is dumped to standard
output.

Report bugs to <anicka\@suse.cz>
EOHELP
	exit 0;
}

sub version {
	(my $progname = $0) =~ s|.*/||;
	print $progname, " version ", $VERSION, " (", scalar(localtime($^T-(3600*24*-M $0))), ")\n";
	exit 0;
}

sub usage {
	print STDERR "$0: @_\n" if @_;
	print STDERR "Try `$0 --help' for more information.\n";
	exit 1;
}


our ($xmlbase, $outbase);

sub is_abs { $_[0]->path =~ m|^/| || $_[0]->scheme }

sub dom_join {
	my ($node, $base, $depth) = @_;
	my $owner = $node->getOwnerDocument;

	# accept xml:base attribute
	if ($node->getAttribute("xml:base")) {
		$base = URI::WithBase->new($node->getAttribute("xml:base"), $base->abs);
		$node->removeAttribute("xml:base");
	}


	my $fileref = $node->getAttribute("fileref");
	if ($fileref) {
	
		# copy/move local objects
		my $abs = URI::file->new($fileref)->abs($base->abs)->file;
		my @louris = map m|^\$XML_BASE/(.*)|?URI::file->new($1)->abs($base->abs)->file:$_, @opt_lodirs;
		@louris = map URI::file->new_abs($_)->file . "/", map glob, @louris;
		for (grep {substr($abs, 0, length($_)) eq $_} @louris) {
			my $dir = $opt_lodir . "/" . substr($abs, length($_));
			$node->setAttribute("fileref", $dir);
			$dir = URI::file->new(dirname $dir)->abs($outbase->abs)->file;
			mkpath $dir;
			$abs =~ s|\.[^./]*$|.*|;
			print STDERR $opt_lomove?"Move":"Copy", " local objects: ", 
				URI::file->new_abs($abs)->rel(URI::file->cwd), "\n" if $verbosity_level > 1;
			for(glob($abs)) {
				my ($dev1, $ino1) = stat $_;
				my ($dev2, $ino2) = stat $dir . "/" . basename $_;
				next if defined $dev2 && $dev1 == $dev2 && $ino1 == $ino2;
				$opt_lomove ? move($_, $dir) : copy($_, $dir) ;
			}
			goto SKIP_FIXUP_FILEREF;
		}
	
		# fixup @fileref (NOTE: it is file name, not URI)
		if ($fileref !~ m|^/|) {
			my $furi = URI::WithBase->new(URI::file->new($fileref), $base->abs);
			if (is_abs($furi) || is_abs($base)) {
				$furi = $furi->abs;
			} else {
				$furi = $furi->abs->rel($outbase->abs);
			}
			print STDERR "Fixup fileref: $fileref => ", $furi->file, "\n" if $verbosity_level > 1;
			$fileref = $furi->file;
			$node->setAttribute("fileref", $fileref);
		}
SKIP_FIXUP_FILEREF:
	}

	if ($node->getNodeName eq "xi:include") {
		my $href = $node->getAttribute("href");
		unless ($href) {
			print STDERR "Warning: undefined href attribute in xi:include element\n" if $verbosity_level>=0;
			return;
		}
		$base = URI::WithBase->new(URI::file->new($href), $base->abs);
		print STDERR "Including file: ", $base->abs->rel(URI::file->cwd), "\n" if $verbosity_level > 0;
		my $parser = new XML::DOM::Parser(KeepCDATA =>1, ParseParamEnt=>0);
		my $doc = $parser->parsefile($base->abs->file);
		dom_join($doc->getDocumentElement, $base, $depth+1);
		return;
	}


	print OUT "<", $node->getNodeName;
	my $map = $node->getAttributes;
	for(my $i=0; $i < $map->getLength; $i++){
		print OUT " ", $map->item($i)->toString;
	}
	my @children = $node->getChildNodes;
	if (@children) {
		print ">";
		# proces children
		for my $child ($node->getChildNodes) {
			if ($child->getNodeType != ELEMENT_NODE) {
				print OUT $child->toString;
				next;
			}
			dom_join($child, $base, $depth+1);
		}
		print "</", $node->getNodeName, ">";
	} else {
		print "/>";
	}
		
}


$xmlbase = URI::WithBase->new($opt_xmlbase || URI::file->new($ARGV[0]), URI::file->cwd);

XML::DOM::ignoreReadOnly(1);
my $parser = new XML::DOM::Parser(KeepCDATA => 1, ParseParamEnt => 0);
my $doc = $parser->parsefile($ARGV[0]);

my $encoding = $opt_encoding || ($doc->getXMLDecl ? $doc->getXMLDecl->getEncoding : undef) || "UTF-8";
$doc->setXMLDecl($doc->createXMLDecl("1.0")) unless $doc->getXMLDecl;
$doc->getXMLDecl->setEncoding($encoding);

*OUT = \*STDOUT;
$outbase = URI::WithBase->new(URI::file->cwd);
if ($ARGV[1]) {
	copy($ARGV[0], $ARGV[0] . "~") if $xmlbase->abs->eq(URI::file->new_abs($ARGV[1]));
	mkpath dirname $ARGV[1];
	open OUT, ">", $ARGV[1] or die "$ARGV[1]: $!\n";
	$outbase = URI::WithBase->new(URI::file->new($ARGV[1]), URI::file->cwd);
}
binmode OUT, ":utf8";

print OUT $doc->getXMLDecl->toString, "\n" if $doc->getXMLDecl;

print OUT $doc->getDoctype->toString, "\n" if $doc->getDoctype;

dom_join($doc->getDocumentElement, $xmlbase, 1);

print OUT "\n";

close OUT or die "$!\n";


