Hi you two I give you some nice raw spaghetti code to filter your tags, save as TagFilter.h in your browser’s profile folder, adjust as preferred:
use HTML::TagFilter;
my $tf = new HTML::TagFilter;
my $clean_html = $tf->filter($dirty_html);
# or
my $tf = HTML::TagFilter->new(
allow=>{...},
deny=>{...},
log_rejects => 1,
strip_comments => 1,
echo => 1,
verbose => 1,
skip_xss_protection => 1,
skip_entification => 1,
skip_mailto_entification => 1,
xss_risky_attributes => [...],
xss_permitted_protocols => [...],
xss_allow_local_links => 1,
);
# or
my $tf = HTML::TagFilter->new(
on_finish_document =>sub {
return "\n<p>" . $self->report . "</p>\n";
},
);
$tf->parse($some_html);
$tf->parse($more_html);
my $clean_html = $tf->output;
my $cleaning_summary = $tf->report;
my @tags_removed = $tf->report;
my $error_log = $tf->error;
my $tf = HTML::TagFilter->new(
log_rejects => 1,
strip_comments => 1,
echo => 1,
verbose => 1,
skip_xss_protection => 1,
skip_ltgt_entification => 1,
skip_mailto_entification => 1,
);
$self->xss_risky_attributes( qw(your list of attributes) );
$self->allow_tags({ p => { 'any' });
$self->allow_tags({ p => { none => [] });
$self->allow_tags({ a => { any => [] } });
$self->deny_tags({ a => { onClick => [] } });
my $filter = HTML::TagFilter->new(
on_start_document => sub {
my ($self, $rawtext) = @_;
$self->{_tag_stack} = [];
return;
},
on_open_tag => sub {
my ($self, $tag, $attributes, $sequence) = @_;
push @{ $self->{_tag_stack} }, $$tag unless grep {$_ eq $$tag} qw(img br hr meta link);
return;
},
on_close_tag => sub {
my ($self, $tag) = @_;
unless (@{ $self->{_tag_stack} } && grep {$_ eq $$tag} @{ $self->{_tag_stack} }) {
undef ${ $tag };
return;
}
my @unclosed;
while (my $lasttag = pop @{ $self->{_tag_stack} }) {
return join '', map "</$_>", @unclosed if $lasttag eq $$tag;
push @unclosed, $lasttag;
}
},
on_finish_document => sub {
my ($self, $cleantext) = @_;
return join '', map "</$_>", reverse @{ $self->{_tag_stack} };
},
);
sub on_open_tag {
my ($self, $tag, $attributes, $sequence) = @_;
$$tag = 'strong' if $$tag eq 'b';
}
sub on_close_tag {
my ($self, $tag) = @_;
$$tag = 'strong' if $$tag eq 'b';
}
print $tf->filter( $pages{$_} ) for keys %pages;
polonus