#!/usrenv perl -w

# needed modules
use WWW::Mechanize; # WWW::Mechanize for parse website
use strict;
use utf8; # hey its a german website

# mechanize handler
my $m = WWW::Mechanize->new();

# site to parse
my $url = "http://german-bash.org/action/toprandom";

# open site
$m->get($url) || die "[!!]\tWebsite cannot be opened - exiting!\n";

# write content to variable
my $res = $m->content();

$res =~ m{<span class="date">(.*?)</span>}gs;
my $date = $1;

$res =~m{<span class="network"><a .*>(.*?)</a> </span>}gs;
my $net = $1;

# extract html from the div-container we want
$res =~ m{<div class=\"zitat\">(.*?)</div>}gs;

# so write to a variable so we can work with it
my $html_text = $1;
$html_text =~ s/<span class=\"quote_zeile\">/\n/gs; # rip off html-tags
$html_text =~ s/<\/span>//gs; #rip off html-tags

# split the string and write to an array
my @lines = split(/\n/, $html_text);
foreach my $line (@lines) { 
	chomp($line); # remove line break
		$line = trim($line); # cosmetic
		utf8::decode($line); # important - german website ...
		if($line =~ /^$/) { next }; # jump over empty lines
			print "$line\n"; # finally print it out
}
print "\n";
print " ~~~ Found on: $net - date:  $date ~~~\n\n";

# ... no trim() in perl ...
sub trim {
	my $string = shift;
	# working with links
	$string =~ s/^\s+//gs; # spaces at the beginning
		$string =~ s/\s+$//gs; # spaces at the end
		$string =~ s/\&lt\;/\</g; # "<"-symbol
		$string =~ s/\&gt\;/\>/g; # ">"-sympol
		$string =~ s/\&quot\;/\"/g; # "-symbol
		$string =~ s/\&amp\;/\&/g; # &-symbol
		$string =~ s/\n//; # remove line-breaks in the middle
		$string =~ s/(.{80})\b /$1\n\t/g; # break after X chars
		return $string; # return the string
}

