| 3 |
# $Id$ |
# $Id$ |
| 4 |
# |
# |
| 5 |
# $Log$ |
# $Log$ |
| 6 |
|
# Revision 1.2 2002/06/27 02:14:22 cvsjoko |
| 7 |
|
# + stripHtml stripSpaces stripNewLines toReal |
| 8 |
|
# |
| 9 |
# Revision 1.1 2002/06/24 14:49:59 cvsjoko |
# Revision 1.1 2002/06/24 14:49:59 cvsjoko |
| 10 |
# + new |
# + new |
| 11 |
# |
# |
| 20 |
Dumper |
Dumper |
| 21 |
md5 md5_hex md5_base64 |
md5 md5_hex md5_base64 |
| 22 |
ParseDate UnixDate |
ParseDate UnixDate |
| 23 |
|
|
| 24 |
|
stripHtml stripSpaces stripNewLines toReal |
| 25 |
); |
); |
| 26 |
|
|
| 27 |
use strict; |
use strict; |
| 33 |
$main::TZ = 'GMT'; |
$main::TZ = 'GMT'; |
| 34 |
use Date::Manip; |
use Date::Manip; |
| 35 |
|
|
| 36 |
|
require LWP::UserAgent; |
| 37 |
|
use HTML::PullParser; |
| 38 |
|
|
| 39 |
|
|
| 40 |
|
######################################## |
| 41 |
|
|
| 42 |
|
sub stripSpaces { |
| 43 |
|
my $text = shift; |
| 44 |
|
#print "text: $text", "\n"; |
| 45 |
|
#print "ord: ", ord(substr($text, 0, 1)), "\n"; |
| 46 |
|
$text =~ s/^\s*//g; |
| 47 |
|
$text =~ s/\s*$//g; |
| 48 |
|
return $text; |
| 49 |
|
} |
| 50 |
|
|
| 51 |
|
sub stripNewLines { |
| 52 |
|
my $text = shift; |
| 53 |
|
#print "text: $text", "\n"; |
| 54 |
|
#print "ord: ", ord(substr($text, 0, 1)), "\n"; |
| 55 |
|
$text =~ s/\n//g; |
| 56 |
|
#$text =~ s/\s*$//g; |
| 57 |
|
return $text; |
| 58 |
|
} |
| 59 |
|
|
| 60 |
|
sub toReal { |
| 61 |
|
my $string = shift; |
| 62 |
|
$string =~ m/(\d+\.*\d+)/; |
| 63 |
|
my $real = $1; |
| 64 |
|
return $real; |
| 65 |
|
} |
| 66 |
|
|
| 67 |
|
sub stripHtml { |
| 68 |
|
my $html = shift; |
| 69 |
|
my $result = ''; |
| 70 |
|
#$html =~ s/<br>(.*)/ - ($1)/i; |
| 71 |
|
my $p = HTML::PullParser->new( |
| 72 |
|
doc => \$html, |
| 73 |
|
text => 'text', |
| 74 |
|
unbroken_text => 1, |
| 75 |
|
); |
| 76 |
|
while (my $token = $p->get_token()) { |
| 77 |
|
my $text = join('', @{$token}); |
| 78 |
|
$result .= $text; |
| 79 |
|
} |
| 80 |
|
#$result =~ s/ //g; |
| 81 |
|
return $result; |
| 82 |
|
} |
| 83 |
|
|
| 84 |
1; |
1; |