| 1 |
## ------------------------------------------------------------------------- |
| 2 |
## $Id: String.pm,v 1.4 2003/02/21 08:37:25 joko Exp $ |
| 3 |
## ------------------------------------------------------------------------- |
| 4 |
## $Log: String.pm,v $ |
| 5 |
## Revision 1.4 2003/02/21 08:37:25 joko |
| 6 |
## minor fix |
| 7 |
## |
| 8 |
## Revision 1.3 2003/02/20 21:23:34 joko |
| 9 |
## using 'trim_space' from Pitonyak::StringUtil as a replacement for our 'trim' |
| 10 |
## |
| 11 |
## Revision 1.2 2003/01/31 01:22:19 root |
| 12 |
## + sub getLastPart |
| 13 |
## |
| 14 |
## Revision 1.1 2003/01/19 03:27:36 joko |
| 15 |
## + initial check-in |
| 16 |
## ------------------------------------------------------------------------- |
| 17 |
|
| 18 |
|
| 19 |
package Data::Mungle::Transform::String; |
| 20 |
|
| 21 |
use strict; |
| 22 |
use warnings; |
| 23 |
|
| 24 |
require Exporter; |
| 25 |
our @ISA = qw( Exporter ); |
| 26 |
our @EXPORT_OK = qw( |
| 27 |
stripHtml stripNewLines toReal getLastPart trim |
| 28 |
); |
| 29 |
|
| 30 |
|
| 31 |
use Pitonyak::StringUtil qw( trim_space ); |
| 32 |
|
| 33 |
sub stripHtml { |
| 34 |
my $html = shift; |
| 35 |
my $result = ''; |
| 36 |
#$html =~ s/<br>(.*)/ - ($1)/i; |
| 37 |
my $p = HTML::PullParser->new( |
| 38 |
doc => \$html, |
| 39 |
text => 'text', |
| 40 |
unbroken_text => 1, |
| 41 |
); |
| 42 |
while (my $token = $p->get_token()) { |
| 43 |
my $text = join('', @{$token}); |
| 44 |
$result .= $text; |
| 45 |
} |
| 46 |
#$result =~ s/ //g; |
| 47 |
return $result; |
| 48 |
} |
| 49 |
|
| 50 |
sub trim { |
| 51 |
trim_space(@_); |
| 52 |
} |
| 53 |
|
| 54 |
sub stripNewLines { |
| 55 |
my $text = shift; |
| 56 |
#print "text: $text", "\n"; |
| 57 |
#print "ord: ", ord(substr($text, 0, 1)), "\n"; |
| 58 |
$text =~ s/\n//g; |
| 59 |
#$text =~ s/\s*$//g; |
| 60 |
return $text; |
| 61 |
} |
| 62 |
|
| 63 |
sub toReal { |
| 64 |
my $string = shift; |
| 65 |
$string =~ m/(\d+\.*\d+)/; |
| 66 |
my $real = $1; |
| 67 |
return $real; |
| 68 |
} |
| 69 |
|
| 70 |
sub getLastPart { |
| 71 |
my $which = shift; |
| 72 |
my $seperator = shift; |
| 73 |
$seperator ||= '\/'; |
| 74 |
$which =~ m/^.*$seperator(.+?)$/; |
| 75 |
return $1; |
| 76 |
} |
| 77 |
|
| 78 |
1; |