/[cvs]/nfo/perl/libs/Regexp/Group.pm
ViewVC logotype

Diff of /nfo/perl/libs/Regexp/Group.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.4 by joko, Thu Mar 27 15:45:00 2003 UTC revision 1.5 by joko, Tue May 13 09:10:15 2003 UTC
# Line 2  Line 2 
2  ##  $Id$  ##  $Id$
3  ## ------------------------------------------------------------------------  ## ------------------------------------------------------------------------
4  ##  $Log$  ##  $Log$
5    ##  Revision 1.5  2003/05/13 09:10:15  joko
6    ##  added documentation (pod) and comments
7    ##  now using Class::Smart as base class
8    ##  new: reporting and statistics
9    ##
10  ##  Revision 1.4  2003/03/27 15:45:00  joko  ##  Revision 1.4  2003/03/27 15:45:00  joko
11  ##  fixes to modules regarding new namespace(s) below Data::Mungle::*  ##  fixes to modules regarding new namespace(s) below Data::Mungle::*
12  ##  ##
# Line 17  Line 22 
22  ## ------------------------------------------------------------------------  ## ------------------------------------------------------------------------
23    
24    
25    =pod
26    
27    =head1 Name
28    
29      Regexp::Group
30    
31    
32    =head1 Description
33    
34      Perform a group of forward declared steps on a string
35      using regular expressions and callback event handlers.
36      
37      A "step" is a triple of a pattern, an event handler and associated metadata.
38      
39      The regular expression pattern is used to scan the payload and a corresponding
40      event handler is used to mungle the raw result of the pattern match into a more
41      self-descriptive form. The metadata information passed along gets used to do
42      right that.
43      
44      The declaration happens - package/block/topic - based.
45      The elements are linked to each other across blocks by giving them the same name.
46      
47      The block / declaration data structure are simple hashes.
48      
49      The declaration of these has to happen outside of this module, the intention of this
50      module can be pictured as the "(state) engine" which processes this "receipt".
51    
52      I don't actually remember, but *returning groups* of results (grouped records)
53      also might have been an initial aspect/intention of this module. However - there
54      has been no investigation if it actually worked out as expected.
55      
56      It works like it is. Never touch a running system.  ;-)
57    
58    
59    =head1 Todo
60    
61      o Kind of runtime introspection to give encapsulated access some innards:
62          
63          1. payload
64            $self->{data}
65          
66          2. receipt
67            $self->{metadata}
68            $self->{patterns}
69            $self->{coderefs}
70    
71    
72    =head1 Bugs
73    
74      Yes.
75    
76    
77    =cut
78    
79    
80  package Regexp::Group;  package Regexp::Group;
81    
82  use strict;  use strict;
83  use warnings;  use warnings;
84    
85  use base qw( DesignPattern::Logger );  use base qw( Class::Smart DesignPattern::Logger );
86    $Class::Smart::constructor = '_init';
87    
88    
89  use Data::Dumper;  use Data::Dumper;
   
90  use Data::Mungle::Compare::Struct qw( isEmpty );  use Data::Mungle::Compare::Struct qw( isEmpty );
91    
92  my @patterns;  # TODO/REVIEW: compile patterns? (see below...)
93    #my @patterns;
94    
 # ------------    common perl object constructor    ------------  
 sub new {  
   my $invocant = shift;  
   my $class = ref($invocant) || $invocant;  
   my @args = ();  
   @_ && (@args = @_);  
   #$logger->debug( __PACKAGE__ . "->new( @args )" );      # this is not "common"!  
   my $self = { @_ };  
   bless $self, $class;  
   $self->{caller} = caller;  
     
   #print Dumper(caller(2));  
   #exit;  
     
   $self->_init();  
   return $self;  
 }    
95    
96    # Initializer / Pseudo constructor - Does some work instead of an otherwise required constructor.
97  sub _init {  sub _init {
98    my $self = shift;    my $self = shift;
99        
100      # Just initialize once, prevent multiple redundant (false|zombie) calls to this method.
101      return if $self->{__INITIALIZED};
102      $self->{__INITIALIZED}++;
103    
104      $self->{caller} = caller;
105    
106    # TODO/REVIEW: compile patterns?    # TODO/REVIEW: compile patterns?
107    #@patterns = map{ qr/$_/ } @patterns;    #@patterns = map{ qr/$_/ } @patterns;
108        
# Line 59  sub _init { Line 110  sub _init {
110    $self->{data} = ${$self->{data}};    $self->{data} = ${$self->{data}};
111  }  }
112    
113    
114    # Kicks off processing and reports statistics from subsequential steps.
115  sub scan {  sub scan {
116    my $self = shift;    my $self = shift;
117    my $stepkeys = shift;    my $stepkeys = shift;
118    my $coderef = shift;    my $coderef = shift;
119        
120      $self->clear_report();
121      
122    #my $nodecount_default = shift;    #my $nodecount_default = shift;
123    my $nodecount_default = 19;    my $nodecount_default = 19;
124    
125  #print "SCAN", "\n";    #print "SCAN", "\n";
126  #print $self->{pattern}, "\n";    #print $self->{pattern}, "\n";
127    $self->{result} = [];    $self->{result} = [];
128    $self->{result_raw} = [];    $self->{result_raw} = [];
129    
# Line 78  sub scan { Line 133  sub scan {
133      # disables _any_ tracing - even if sub-conditions evaluate to true values      # disables _any_ tracing - even if sub-conditions evaluate to true values
134      $self->{TRACE_DISABLED} = 0;      $self->{TRACE_DISABLED} = 0;
135    
136    $self->log('starting to scan');    $self->log('--------- start ---');
137    
138    #for (1..20) {    #for (1..20) {
139        
# Line 96  sub scan { Line 151  sub scan {
151  #print "cb-2: $coderef", "\n";  #print "cb-2: $coderef", "\n";
152    
153    foreach my $stepkey_current (@{$steps}) {    foreach my $stepkey_current (@{$steps}) {
154      $self->_scan_step($stepkey_current, $coderef);      $self->{report}->{$stepkey_current} = $self->_scan_step($stepkey_current, $coderef);
155      $patterncount++;      $patterncount++;
156    }    }
157    
# Line 109  sub scan { Line 164  sub scan {
164        
165    #$self->{code}->($self->{caller}, $self);    #$self->{code}->($self->{caller}, $self);
166        
167      # new of 2003-05-09: return reference to report as response
168      #return $self->{report};
169      
170  }  }
171    
172  sub _scan_step {  sub _scan_step {
# Line 117  sub _scan_step { Line 175  sub _scan_step {
175    my $stepkey = shift;    my $stepkey = shift;
176    my $callback = shift;    my $callback = shift;
177    
178  #print "cb-3: $callback", "\n";    $self->log("Running step '$stepkey'.");
179    
180      #$self->log("running step '$stepkey'", 'debug');    # All data we need from the current instance to drive the pattern match via "gex".
181      $self->log("running step '$stepkey'");    my $dataref = $self->{data};
182      my $metadata = $self->{metadata}->{$stepkey};
183      my $dataref = $self->{data};    my $pattern = $self->{patterns}->{$stepkey};
184      my $metadata = $self->{metadata}->{$stepkey};    my $coderef = $self->{coderefs}->{$stepkey};
     my $pattern = $self->{patterns}->{$stepkey};  
     my $coderef = $self->{coderefs}->{$stepkey};  
185            
186      #print Dumper($coderef);    # Doing some assertions which lead to structure tracing on DEBUG-OUT ...
     #print Dumper($self->{data});  
     #exit;  
187    
188      $self->trace("data used", $dataref, 0, undef, { tag => '', exit => 0 });      # debug
189        #print "pattern: $pattern", "\n";
190        #exit;
191      
192        # Having all these assertions set/evaluate to true, it will dump an overview of what happens in the innards on STDOUT.
193        my $assertion = ($stepkey eq 'bet');
194        $assertion = 0;
195        $self->trace("data used", $dataref, $assertion, undef, { tag => '', exit => 0 });
196        $self->trace("pattern used", $pattern, $assertion, undef, { tag => '', exit => 0 });
197      $self->trace("metadata used", $metadata, 0, undef, { tag => '', exit => 0 });      $self->trace("metadata used", $metadata, 0, undef, { tag => '', exit => 0 });
     $self->trace("pattern used", $pattern, 0, undef, { tag => '', exit => 0 });  
198      $self->trace("coderef used", $coderef, 0, undef, { tag => '', exit => 0 });      $self->trace("coderef used", $coderef, 0, undef, { tag => '', exit => 0 });
199    
200  #print "pattern: $pattern", "\n";    # If metadata variable "noscan" is supplied, don't kick off the regex-engine, just call the coderef.
201  #exit;    # TODO: What was the intention of this? How are its mechanics working? Describe this here!
202      # FIXME: Log this event.
203      if ($metadata->{noscan} && $metadata->{noscan} == 1) {
204        $coderef->($self, undef, undef, $callback);
205        return;
206      }
207      
208      # dereference data if it's still referenced
209      my $data = ${$self->{data}};
210    
211      # if metadata variable "noscan" is supplied, don't kick off the regex-engine, just call the coderef    if (!$data) {
212      # TODO: log this event      $self->log("data is empty: stepkey=$stepkey", 'warning');
213      if ($metadata->{noscan} && $metadata->{noscan} == 1) {      return;
214        $coderef->($self, undef, undef, $callback);    }
       return;  
     }  
       
     # dereference data if its still referenced  
     my $data = ${$self->{data}};  
215    
216      if (!$data) {    
217        $self->log("data is empty: stepkey=$stepkey", 'warning');    # To detect if anything happened inside the regex after it: Could the pattern be applied?.
218        return;    my $matchcount = 0;
     }  
219    
220      #$self->{data} =~ s{    # Apply the regular expression using "gex"-options for the "Perl regular expressions"
221      $data =~ s{    # parser that's embedded in Perl-5.6.1, see "perldoc perlre".
222      
223      # This is preliminary:
224      # This code initially was written in Dec-2002 with Perl-5.6.1 on Windows using
225      # [perl, v5.6.1 built for MSWin32-x86-multi-thread], has been tested with
226      # Perl-5.8.x on Linux and seems to work fine in production on a FreeBSD machine
227      # running Perl-5.?.? since mid 2003.
228      
229      #$self->{data} =~ s{
230      $data =~ s{
231    
232        $pattern       # the pattern itself - as contained in a string - actually gets interpolated here        $pattern       # the pattern itself - as contained in a string - actually gets interpolated here
233    
234      }{    }{
235    
236          # Indicate for the followup code that the pattern matched.
237          $matchcount++;
238          
239        print "." if $self->{verbose};        print "." if $self->{verbose};
240    
241        $self->log("match in step '$stepkey'");        #$self->log("Match in step '$stepkey'!");
242    
243        # have a clean item to fill in slots        # have a clean item to fill in slots
244        my $result_item = [];        my $result_item = [];
# Line 189  sub _scan_step { Line 264  sub _scan_step {
264    
265    
266        # 2. remember built result (raw) in object - grouping functionality for items        # 2. remember built result (raw) in object - grouping functionality for items
267          $self->log("pushing raw result");          #$self->log("pushing raw result");
268          push @{$self->{result_raw}}, $result_item;          push @{$self->{result_raw}}, $result_item;
269        
270                
# Line 225  sub _scan_step { Line 300  sub _scan_step {
300    
301    
302        # 3. call coderef to get _processed_ result if given (callback)        # 3. call coderef to get _processed_ result if given (callback)
303          $self->log("calling coderef '$coderef'");          $self->log("Match!!! Calling event handler '$coderef'.");
304          if ($coderef) {          if ($coderef) {
305            my $result_processed = {};            my $result_processed = {};
306            if ($coderef->($self, $result_item, $result_processed, $callback)) {            if ($coderef->($self, $result_item, $result_processed, $callback)) {
# Line 239  sub _scan_step { Line 314  sub _scan_step {
314            # TODO: croak 'no coderef';            # TODO: croak 'no coderef';
315          }          }
316    
317        # pass back matched content if we are not the ones to delete something (pass-back-if-not-remover)        # Pass back matched content if we are not the ones to delete something (pass-back-if-not-remover)
318        # with this mechanism, we can iterate over the same content again and again and don't loose data while doing this        # With this mechanism, we can iterate over the same content again and again and don't loose data while doing this.
319        # without this, multiple scan stepping would not be possible since each step would either remove its        # Without this, multiple scan stepping would not be possible since each step would either remove its
320        # matched content - or not: no flexibility.        # matched content - or not: no flexibility.
321        # as it seems we need flexibility at any point ;-( in order not to break our necks with code-maintenance ;-)        # As it seems we need flexibility at any point ;-( in order not to break our necks with code-maintenance ;-)
322        # this also can get configured here (this flag is contained in the metadata-part of your filter-declaration)        # this also can get configured here (this flag is contained in the metadata-part of your filter-declaration).
323        $& if !$metadata->{remover};        $& if !$metadata->{remover};
324    
325      }xge;    }gex;
     
326    
327      #$self->{data_rest} = \$data;    # What to do with the payload?
328      $self->{data} = \$data;    #$self->{data_rest} = \$data;
329      $self->{data} = \$data;
330    
331      #$self->trace('matches', $self->getMatches(), 1);    #$self->trace('matches', $self->getMatches(), 1);
332      #$self->trace('matches-positions', $self->getMatchPositions(), 1);    #$self->trace('matches-positions', $self->getMatchPositions(), 1);
333    
334      #print "\r" if $self->{verbose};    # Try to fake some kinda progress bar on STDOUT.
335      print " " x 50 if $self->{verbose};    #print "\r" if $self->{verbose};
336      print "\r" if $self->{verbose};    print STDOUT " " x 50 if $self->{verbose};
337      print STDOUT "\r" if $self->{verbose};
338    
339      # new of 2003-05-08: Now we have a return value we can also send to debug output handler.
340      $self->log("No Match for step '$stepkey'!", 'debug') if not defined $matchcount or $matchcount == 0;
341      
342      # Indicate if regex for current step could be applied.
343      return $matchcount;
344  }  }
345    
346  sub continue {  sub continue {
# Line 309  sub getMatchPosition { Line 390  sub getMatchPosition {
390    return $self->{matches}->{position};    return $self->{matches}->{position};
391  }  }
392    
393    sub get_report {
394      my $self = shift;
395      return $self->{report};
396    }
397    
398    sub clear_report {
399      my $self = shift;
400      delete $self->{report};
401    }
402    
403  1;  1;
404    __END__

Legend:
Removed from v.1.4  
changed lines
  Added in v.1.5

MailToCvsAdmin">MailToCvsAdmin
ViewVC Help
Powered by ViewVC 1.1.26 RSS 2.0 feed