/[cvs]/nfo/perl/libs/Regexp/Group.pm
ViewVC logotype

Contents of /nfo/perl/libs/Regexp/Group.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.5 - (show annotations)
Tue May 13 09:10:15 2003 UTC (20 years, 11 months ago) by joko
Branch: MAIN
CVS Tags: HEAD
Changes since 1.4: +163 -73 lines
added documentation (pod) and comments
now using Class::Smart as base class
new: reporting and statistics

1 ## ------------------------------------------------------------------------
2 ## $Id: Group.pm,v 1.4 2003/03/27 15:45:00 joko Exp $
3 ## ------------------------------------------------------------------------
4 ## $Log: Group.pm,v $
5 ## Revision 1.4 2003/03/27 15:45:00 joko
6 ## fixes to modules regarding new namespace(s) below Data::Mungle::*
7 ##
8 ## Revision 1.3 2003/02/20 21:11:43 joko
9 ## renamed module
10 ##
11 ## Revision 1.2 2003/02/14 14:17:42 joko
12 ## + more warnings and verbosing
13 ##
14 ## Revision 1.1 2002/12/22 14:16:39 joko
15 ## + initial check-in
16 ##
17 ## ------------------------------------------------------------------------
18
19
20 =pod
21
22 =head1 Name
23
24 Regexp::Group
25
26
27 =head1 Description
28
29 Perform a group of forward declared steps on a string
30 using regular expressions and callback event handlers.
31
32 A "step" is a triple of a pattern, an event handler and associated metadata.
33
34 The regular expression pattern is used to scan the payload and a corresponding
35 event handler is used to mungle the raw result of the pattern match into a more
36 self-descriptive form. The metadata information passed along gets used to do
37 right that.
38
39 The declaration happens - package/block/topic - based.
40 The elements are linked to each other across blocks by giving them the same name.
41
42 The block / declaration data structure are simple hashes.
43
44 The declaration of these has to happen outside of this module, the intention of this
45 module can be pictured as the "(state) engine" which processes this "receipt".
46
47 I don't actually remember, but *returning groups* of results (grouped records)
48 also might have been an initial aspect/intention of this module. However - there
49 has been no investigation if it actually worked out as expected.
50
51 It works like it is. Never touch a running system. ;-)
52
53
54 =head1 Todo
55
56 o Kind of runtime introspection to give encapsulated access some innards:
57
58 1. payload
59 $self->{data}
60
61 2. receipt
62 $self->{metadata}
63 $self->{patterns}
64 $self->{coderefs}
65
66
67 =head1 Bugs
68
69 Yes.
70
71
72 =cut
73
74
75 package Regexp::Group;
76
77 use strict;
78 use warnings;
79
80 use base qw( Class::Smart DesignPattern::Logger );
81 $Class::Smart::constructor = '_init';
82
83
84 use Data::Dumper;
85 use Data::Mungle::Compare::Struct qw( isEmpty );
86
87 # TODO/REVIEW: compile patterns? (see below...)
88 #my @patterns;
89
90
91 # Initializer / Pseudo constructor - Does some work instead of an otherwise required constructor.
92 sub _init {
93 my $self = shift;
94
95 # Just initialize once, prevent multiple redundant (false|zombie) calls to this method.
96 return if $self->{__INITIALIZED};
97 $self->{__INITIALIZED}++;
98
99 $self->{caller} = caller;
100
101 # TODO/REVIEW: compile patterns?
102 #@patterns = map{ qr/$_/ } @patterns;
103
104 # dereference main/raw data (payload)
105 $self->{data} = ${$self->{data}};
106 }
107
108
109 # Kicks off processing and reports statistics from subsequential steps.
110 sub scan {
111 my $self = shift;
112 my $stepkeys = shift;
113 my $coderef = shift;
114
115 $self->clear_report();
116
117 #my $nodecount_default = shift;
118 my $nodecount_default = 19;
119
120 #print "SCAN", "\n";
121 #print $self->{pattern}, "\n";
122 $self->{result} = [];
123 $self->{result_raw} = [];
124
125 # configure tracing
126 # respects additional trace-options passed to _trace-method
127 $self->{TRACE_OPTIONS} = 1;
128 # disables _any_ tracing - even if sub-conditions evaluate to true values
129 $self->{TRACE_DISABLED} = 0;
130
131 $self->log('--------- start ---');
132
133 #for (1..20) {
134
135 my $patterncount = 0;
136 my $match_in_loop;
137
138 my $steps;
139 if ($stepkeys) {
140 $steps = $stepkeys;
141 $patterncount++;
142 } else {
143 $steps = $self->{metadata}->{steps};
144 }
145
146 #print "cb-2: $coderef", "\n";
147
148 foreach my $stepkey_current (@{$steps}) {
149 $self->{report}->{$stepkey_current} = $self->_scan_step($stepkey_current, $coderef);
150 $patterncount++;
151 }
152
153 #}
154
155 # ... propagate them to processing
156
157 # assign result to property of object's current instance
158 #push @{$self->{result}}, $result;
159
160 #$self->{code}->($self->{caller}, $self);
161
162 # new of 2003-05-09: return reference to report as response
163 #return $self->{report};
164
165 }
166
167 sub _scan_step {
168
169 my $self = shift;
170 my $stepkey = shift;
171 my $callback = shift;
172
173 $self->log("Running step '$stepkey'.");
174
175 # All data we need from the current instance to drive the pattern match via "gex".
176 my $dataref = $self->{data};
177 my $metadata = $self->{metadata}->{$stepkey};
178 my $pattern = $self->{patterns}->{$stepkey};
179 my $coderef = $self->{coderefs}->{$stepkey};
180
181 # Doing some assertions which lead to structure tracing on DEBUG-OUT ...
182
183 # debug
184 #print "pattern: $pattern", "\n";
185 #exit;
186
187 # Having all these assertions set/evaluate to true, it will dump an overview of what happens in the innards on STDOUT.
188 my $assertion = ($stepkey eq 'bet');
189 $assertion = 0;
190 $self->trace("data used", $dataref, $assertion, undef, { tag => '', exit => 0 });
191 $self->trace("pattern used", $pattern, $assertion, undef, { tag => '', exit => 0 });
192 $self->trace("metadata used", $metadata, 0, undef, { tag => '', exit => 0 });
193 $self->trace("coderef used", $coderef, 0, undef, { tag => '', exit => 0 });
194
195 # If metadata variable "noscan" is supplied, don't kick off the regex-engine, just call the coderef.
196 # TODO: What was the intention of this? How are its mechanics working? Describe this here!
197 # FIXME: Log this event.
198 if ($metadata->{noscan} && $metadata->{noscan} == 1) {
199 $coderef->($self, undef, undef, $callback);
200 return;
201 }
202
203 # dereference data if it's still referenced
204 my $data = ${$self->{data}};
205
206 if (!$data) {
207 $self->log("data is empty: stepkey=$stepkey", 'warning');
208 return;
209 }
210
211
212 # To detect if anything happened inside the regex after it: Could the pattern be applied?.
213 my $matchcount = 0;
214
215 # Apply the regular expression using "gex"-options for the "Perl regular expressions"
216 # parser that's embedded in Perl-5.6.1, see "perldoc perlre".
217
218 # This is preliminary:
219 # This code initially was written in Dec-2002 with Perl-5.6.1 on Windows using
220 # [perl, v5.6.1 built for MSWin32-x86-multi-thread], has been tested with
221 # Perl-5.8.x on Linux and seems to work fine in production on a FreeBSD machine
222 # running Perl-5.?.? since mid 2003.
223
224 #$self->{data} =~ s{
225 $data =~ s{
226
227 $pattern # the pattern itself - as contained in a string - actually gets interpolated here
228
229 }{
230
231 # Indicate for the followup code that the pattern matched.
232 $matchcount++;
233
234 print "." if $self->{verbose};
235
236 #$self->log("Match in step '$stepkey'!");
237
238 # have a clean item to fill in slots
239 my $result_item = [];
240
241 # 1. build matched structure from slots - grouping functionality for slots
242
243 # V1 - hand-rolled
244 #my $result = [$1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20];
245 #print Dumper($result);
246
247 my $count = $metadata->{nodecount};
248
249 # TODO: re-renable this!
250 #$count ||= $nodecount_default;
251
252 # V2 - iterate!
253 # push dummy item to slot '0' to mimic known behaviour of accessing $1, $2, $3, ....
254 push @{$result_item}, 'n/a';
255 for my $i (1..$count) {
256 my $slot = '$' . $i;
257 push @{$result_item}, eval($slot);
258 }
259
260
261 # 2. remember built result (raw) in object - grouping functionality for items
262 #$self->log("pushing raw result");
263 push @{$self->{result_raw}}, $result_item;
264
265
266 # 4. check/modify further metadata
267 # e.g. 'column_positions' means: "this step is a ColumnProvider"
268 #print Dumper($metadata);
269 if (my $positions = $metadata->{column_positions}) {
270 $self->trace('positions', $positions, 0);
271
272 #print Dumper($result_item);
273
274 # build column-names array from position-metadata
275 my @column_names;
276 map {
277 push @column_names, $result_item->[$_];
278 } @$positions;
279
280 # store additional metadata
281 $metadata->{column_names} = \@column_names;
282 }
283
284 $self->trace('column-names', $metadata->{column_names}, 0);
285
286
287 #return '';
288 #$&;
289 push(@{$self->{matches}->{parsed_chunks}}, $&);
290 #undef;
291
292 my $matchpos = pos($data);
293 $self->{matches}->{position} = $matchpos;
294 push @{$self->{matches}->{positions}}, $matchpos;
295
296
297 # 3. call coderef to get _processed_ result if given (callback)
298 $self->log("Match!!! Calling event handler '$coderef'.");
299 if ($coderef) {
300 my $result_processed = {};
301 if ($coderef->($self, $result_item, $result_processed, $callback)) {
302 $self->trace("pushing processed result", $result_processed, 0);
303 push @{$self->{result}}, $result_processed if $result_processed;
304 } else {
305 $self->log('processing of container failed', 'warning');
306 return;
307 }
308 } else {
309 # TODO: croak 'no coderef';
310 }
311
312 # Pass back matched content if we are not the ones to delete something (pass-back-if-not-remover)
313 # With this mechanism, we can iterate over the same content again and again and don't loose data while doing this.
314 # Without this, multiple scan stepping would not be possible since each step would either remove its
315 # matched content - or not: no flexibility.
316 # As it seems we need flexibility at any point ;-( in order not to break our necks with code-maintenance ;-)
317 # this also can get configured here (this flag is contained in the metadata-part of your filter-declaration).
318 $& if !$metadata->{remover};
319
320 }gex;
321
322 # What to do with the payload?
323 #$self->{data_rest} = \$data;
324 $self->{data} = \$data;
325
326 #$self->trace('matches', $self->getMatches(), 1);
327 #$self->trace('matches-positions', $self->getMatchPositions(), 1);
328
329 # Try to fake some kinda progress bar on STDOUT.
330 #print "\r" if $self->{verbose};
331 print STDOUT " " x 50 if $self->{verbose};
332 print STDOUT "\r" if $self->{verbose};
333
334 # new of 2003-05-08: Now we have a return value we can also send to debug output handler.
335 $self->log("No Match for step '$stepkey'!", 'debug') if not defined $matchcount or $matchcount == 0;
336
337 # Indicate if regex for current step could be applied.
338 return $matchcount;
339 }
340
341 sub continue {
342 my $self = shift;
343 my $scankey = shift;
344 #print "CONTINUE!", "\n";
345 #$self->{data} = $self->{data_rest};
346 return $self->scan($scankey);
347 }
348
349 sub getMatches {
350 my $self = shift;
351 return $self->{result_raw};
352 }
353
354 sub getResults {
355 my $self = shift;
356 return $self->{result};
357 }
358
359 sub getDataRest {
360 my $self = shift;
361 return $self->{data_rest};
362 }
363
364 sub getMatchCount {
365 my $self = shift;
366
367 my $matches = $self->getMatches();
368 return if isEmpty($matches);
369
370 my $count = $#{$matches};
371 $count++;
372 return $count;
373 }
374
375 sub getMatchPositions {
376 my $self = shift;
377 if (!$self->getMatchCount()) {
378 $self->{matches}->{positions} = [];
379 }
380 return $self->{matches}->{positions};
381 }
382
383 sub getMatchPosition {
384 my $self = shift;
385 return $self->{matches}->{position};
386 }
387
388 sub get_report {
389 my $self = shift;
390 return $self->{report};
391 }
392
393 sub clear_report {
394 my $self = shift;
395 delete $self->{report};
396 }
397
398 1;
399 __END__

MailToCvsAdmin">MailToCvsAdmin
ViewVC Help
Powered by ViewVC 1.1.26 RSS 2.0 feed