/[cvs]/nfo/perl/libs/Regexp/Group.pm
ViewVC logotype

Annotation of /nfo/perl/libs/Regexp/Group.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.5 - (hide annotations)
Tue May 13 09:10:15 2003 UTC (21 years ago) by joko
Branch: MAIN
CVS Tags: HEAD
Changes since 1.4: +163 -73 lines
added documentation (pod) and comments
now using Class::Smart as base class
new: reporting and statistics

1 joko 1.1 ## ------------------------------------------------------------------------
2 joko 1.5 ## $Id: Group.pm,v 1.4 2003/03/27 15:45:00 joko Exp $
3 joko 1.1 ## ------------------------------------------------------------------------
4 joko 1.2 ## $Log: Group.pm,v $
5 joko 1.5 ## Revision 1.4 2003/03/27 15:45:00 joko
6     ## fixes to modules regarding new namespace(s) below Data::Mungle::*
7     ##
8 joko 1.4 ## Revision 1.3 2003/02/20 21:11:43 joko
9     ## renamed module
10     ##
11 joko 1.3 ## Revision 1.2 2003/02/14 14:17:42 joko
12     ## + more warnings and verbosing
13     ##
14 joko 1.2 ## Revision 1.1 2002/12/22 14:16:39 joko
15     ## + initial check-in
16     ##
17 joko 1.1 ## ------------------------------------------------------------------------
18    
19    
20 joko 1.5 =pod
21    
22     =head1 Name
23    
24     Regexp::Group
25    
26    
27     =head1 Description
28    
29     Perform a group of forward declared steps on a string
30     using regular expressions and callback event handlers.
31    
32     A "step" is a triple of a pattern, an event handler and associated metadata.
33    
34     The regular expression pattern is used to scan the payload and a corresponding
35     event handler is used to mungle the raw result of the pattern match into a more
36     self-descriptive form. The metadata information passed along gets used to do
37     right that.
38    
39     The declaration happens - package/block/topic - based.
40     The elements are linked to each other across blocks by giving them the same name.
41    
42     The block / declaration data structure are simple hashes.
43    
44     The declaration of these has to happen outside of this module, the intention of this
45     module can be pictured as the "(state) engine" which processes this "receipt".
46    
47     I don't actually remember, but *returning groups* of results (grouped records)
48     also might have been an initial aspect/intention of this module. However - there
49     has been no investigation if it actually worked out as expected.
50    
51     It works like it is. Never touch a running system. ;-)
52    
53    
54     =head1 Todo
55    
56     o Kind of runtime introspection to give encapsulated access some innards:
57    
58     1. payload
59     $self->{data}
60    
61     2. receipt
62     $self->{metadata}
63     $self->{patterns}
64     $self->{coderefs}
65    
66    
67     =head1 Bugs
68    
69     Yes.
70    
71    
72     =cut
73    
74    
75 joko 1.1 package Regexp::Group;
76    
77     use strict;
78     use warnings;
79    
80 joko 1.5 use base qw( Class::Smart DesignPattern::Logger );
81     $Class::Smart::constructor = '_init';
82 joko 1.1
83    
84     use Data::Dumper;
85 joko 1.4 use Data::Mungle::Compare::Struct qw( isEmpty );
86 joko 1.1
87 joko 1.5 # TODO/REVIEW: compile patterns? (see below...)
88     #my @patterns;
89 joko 1.1
90    
91 joko 1.5 # Initializer / Pseudo constructor - Does some work instead of an otherwise required constructor.
92 joko 1.1 sub _init {
93     my $self = shift;
94    
95 joko 1.5 # Just initialize once, prevent multiple redundant (false|zombie) calls to this method.
96     return if $self->{__INITIALIZED};
97     $self->{__INITIALIZED}++;
98    
99     $self->{caller} = caller;
100    
101 joko 1.1 # TODO/REVIEW: compile patterns?
102     #@patterns = map{ qr/$_/ } @patterns;
103    
104     # dereference main/raw data (payload)
105     $self->{data} = ${$self->{data}};
106     }
107    
108 joko 1.5
109     # Kicks off processing and reports statistics from subsequential steps.
110 joko 1.1 sub scan {
111     my $self = shift;
112     my $stepkeys = shift;
113     my $coderef = shift;
114    
115 joko 1.5 $self->clear_report();
116    
117 joko 1.1 #my $nodecount_default = shift;
118     my $nodecount_default = 19;
119    
120 joko 1.5 #print "SCAN", "\n";
121     #print $self->{pattern}, "\n";
122 joko 1.1 $self->{result} = [];
123     $self->{result_raw} = [];
124    
125     # configure tracing
126     # respects additional trace-options passed to _trace-method
127     $self->{TRACE_OPTIONS} = 1;
128     # disables _any_ tracing - even if sub-conditions evaluate to true values
129     $self->{TRACE_DISABLED} = 0;
130    
131 joko 1.5 $self->log('--------- start ---');
132 joko 1.1
133     #for (1..20) {
134    
135     my $patterncount = 0;
136     my $match_in_loop;
137    
138     my $steps;
139     if ($stepkeys) {
140     $steps = $stepkeys;
141     $patterncount++;
142     } else {
143     $steps = $self->{metadata}->{steps};
144     }
145    
146     #print "cb-2: $coderef", "\n";
147    
148     foreach my $stepkey_current (@{$steps}) {
149 joko 1.5 $self->{report}->{$stepkey_current} = $self->_scan_step($stepkey_current, $coderef);
150 joko 1.1 $patterncount++;
151     }
152    
153     #}
154    
155     # ... propagate them to processing
156    
157     # assign result to property of object's current instance
158     #push @{$self->{result}}, $result;
159    
160     #$self->{code}->($self->{caller}, $self);
161    
162 joko 1.5 # new of 2003-05-09: return reference to report as response
163     #return $self->{report};
164    
165 joko 1.1 }
166    
167     sub _scan_step {
168    
169     my $self = shift;
170     my $stepkey = shift;
171     my $callback = shift;
172    
173 joko 1.5 $self->log("Running step '$stepkey'.");
174 joko 1.1
175 joko 1.5 # All data we need from the current instance to drive the pattern match via "gex".
176     my $dataref = $self->{data};
177     my $metadata = $self->{metadata}->{$stepkey};
178     my $pattern = $self->{patterns}->{$stepkey};
179     my $coderef = $self->{coderefs}->{$stepkey};
180    
181     # Doing some assertions which lead to structure tracing on DEBUG-OUT ...
182 joko 1.1
183 joko 1.5 # debug
184     #print "pattern: $pattern", "\n";
185 joko 1.1 #exit;
186 joko 1.5
187     # Having all these assertions set/evaluate to true, it will dump an overview of what happens in the innards on STDOUT.
188     my $assertion = ($stepkey eq 'bet');
189     $assertion = 0;
190     $self->trace("data used", $dataref, $assertion, undef, { tag => '', exit => 0 });
191     $self->trace("pattern used", $pattern, $assertion, undef, { tag => '', exit => 0 });
192 joko 1.1 $self->trace("metadata used", $metadata, 0, undef, { tag => '', exit => 0 });
193     $self->trace("coderef used", $coderef, 0, undef, { tag => '', exit => 0 });
194    
195 joko 1.5 # If metadata variable "noscan" is supplied, don't kick off the regex-engine, just call the coderef.
196     # TODO: What was the intention of this? How are its mechanics working? Describe this here!
197     # FIXME: Log this event.
198     if ($metadata->{noscan} && $metadata->{noscan} == 1) {
199     $coderef->($self, undef, undef, $callback);
200     return;
201     }
202    
203     # dereference data if it's still referenced
204     my $data = ${$self->{data}};
205 joko 1.1
206 joko 1.5 if (!$data) {
207     $self->log("data is empty: stepkey=$stepkey", 'warning');
208     return;
209     }
210 joko 1.1
211 joko 1.5
212     # To detect if anything happened inside the regex after it: Could the pattern be applied?.
213     my $matchcount = 0;
214 joko 1.2
215 joko 1.5 # Apply the regular expression using "gex"-options for the "Perl regular expressions"
216     # parser that's embedded in Perl-5.6.1, see "perldoc perlre".
217    
218     # This is preliminary:
219     # This code initially was written in Dec-2002 with Perl-5.6.1 on Windows using
220     # [perl, v5.6.1 built for MSWin32-x86-multi-thread], has been tested with
221     # Perl-5.8.x on Linux and seems to work fine in production on a FreeBSD machine
222     # running Perl-5.?.? since mid 2003.
223    
224     #$self->{data} =~ s{
225     $data =~ s{
226 joko 1.1
227     $pattern # the pattern itself - as contained in a string - actually gets interpolated here
228    
229 joko 1.5 }{
230 joko 1.1
231 joko 1.5 # Indicate for the followup code that the pattern matched.
232     $matchcount++;
233    
234 joko 1.1 print "." if $self->{verbose};
235    
236 joko 1.5 #$self->log("Match in step '$stepkey'!");
237 joko 1.1
238     # have a clean item to fill in slots
239     my $result_item = [];
240    
241     # 1. build matched structure from slots - grouping functionality for slots
242    
243     # V1 - hand-rolled
244     #my $result = [$1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20];
245     #print Dumper($result);
246    
247     my $count = $metadata->{nodecount};
248    
249     # TODO: re-renable this!
250     #$count ||= $nodecount_default;
251    
252     # V2 - iterate!
253     # push dummy item to slot '0' to mimic known behaviour of accessing $1, $2, $3, ....
254     push @{$result_item}, 'n/a';
255     for my $i (1..$count) {
256     my $slot = '$' . $i;
257     push @{$result_item}, eval($slot);
258     }
259    
260    
261     # 2. remember built result (raw) in object - grouping functionality for items
262 joko 1.5 #$self->log("pushing raw result");
263 joko 1.1 push @{$self->{result_raw}}, $result_item;
264    
265    
266     # 4. check/modify further metadata
267     # e.g. 'column_positions' means: "this step is a ColumnProvider"
268     #print Dumper($metadata);
269     if (my $positions = $metadata->{column_positions}) {
270     $self->trace('positions', $positions, 0);
271    
272     #print Dumper($result_item);
273    
274     # build column-names array from position-metadata
275     my @column_names;
276     map {
277     push @column_names, $result_item->[$_];
278     } @$positions;
279    
280     # store additional metadata
281     $metadata->{column_names} = \@column_names;
282     }
283    
284     $self->trace('column-names', $metadata->{column_names}, 0);
285    
286    
287     #return '';
288     #$&;
289     push(@{$self->{matches}->{parsed_chunks}}, $&);
290     #undef;
291    
292     my $matchpos = pos($data);
293     $self->{matches}->{position} = $matchpos;
294     push @{$self->{matches}->{positions}}, $matchpos;
295    
296    
297     # 3. call coderef to get _processed_ result if given (callback)
298 joko 1.5 $self->log("Match!!! Calling event handler '$coderef'.");
299 joko 1.1 if ($coderef) {
300     my $result_processed = {};
301     if ($coderef->($self, $result_item, $result_processed, $callback)) {
302     $self->trace("pushing processed result", $result_processed, 0);
303     push @{$self->{result}}, $result_processed if $result_processed;
304     } else {
305     $self->log('processing of container failed', 'warning');
306     return;
307     }
308     } else {
309     # TODO: croak 'no coderef';
310     }
311    
312 joko 1.5 # Pass back matched content if we are not the ones to delete something (pass-back-if-not-remover)
313     # With this mechanism, we can iterate over the same content again and again and don't loose data while doing this.
314     # Without this, multiple scan stepping would not be possible since each step would either remove its
315 joko 1.1 # matched content - or not: no flexibility.
316 joko 1.5 # As it seems we need flexibility at any point ;-( in order not to break our necks with code-maintenance ;-)
317     # this also can get configured here (this flag is contained in the metadata-part of your filter-declaration).
318 joko 1.1 $& if !$metadata->{remover};
319    
320 joko 1.5 }gex;
321 joko 1.1
322 joko 1.5 # What to do with the payload?
323     #$self->{data_rest} = \$data;
324     $self->{data} = \$data;
325 joko 1.1
326 joko 1.5 #$self->trace('matches', $self->getMatches(), 1);
327     #$self->trace('matches-positions', $self->getMatchPositions(), 1);
328 joko 1.2
329 joko 1.5 # Try to fake some kinda progress bar on STDOUT.
330     #print "\r" if $self->{verbose};
331     print STDOUT " " x 50 if $self->{verbose};
332     print STDOUT "\r" if $self->{verbose};
333 joko 1.1
334 joko 1.5 # new of 2003-05-08: Now we have a return value we can also send to debug output handler.
335     $self->log("No Match for step '$stepkey'!", 'debug') if not defined $matchcount or $matchcount == 0;
336    
337     # Indicate if regex for current step could be applied.
338     return $matchcount;
339 joko 1.1 }
340    
341     sub continue {
342     my $self = shift;
343     my $scankey = shift;
344     #print "CONTINUE!", "\n";
345     #$self->{data} = $self->{data_rest};
346     return $self->scan($scankey);
347     }
348    
349     sub getMatches {
350     my $self = shift;
351     return $self->{result_raw};
352     }
353    
354     sub getResults {
355     my $self = shift;
356     return $self->{result};
357     }
358    
359     sub getDataRest {
360     my $self = shift;
361     return $self->{data_rest};
362     }
363    
364     sub getMatchCount {
365     my $self = shift;
366    
367     my $matches = $self->getMatches();
368     return if isEmpty($matches);
369    
370     my $count = $#{$matches};
371     $count++;
372     return $count;
373     }
374    
375     sub getMatchPositions {
376     my $self = shift;
377     if (!$self->getMatchCount()) {
378     $self->{matches}->{positions} = [];
379     }
380     return $self->{matches}->{positions};
381     }
382    
383     sub getMatchPosition {
384     my $self = shift;
385     return $self->{matches}->{position};
386     }
387    
388 joko 1.5 sub get_report {
389     my $self = shift;
390     return $self->{report};
391     }
392    
393     sub clear_report {
394     my $self = shift;
395     delete $self->{report};
396     }
397    
398 joko 1.1 1;
399 joko 1.5 __END__

MailToCvsAdmin">MailToCvsAdmin
ViewVC Help
Powered by ViewVC 1.1.26 RSS 2.0 feed