# $Id: DTDDecls.pm,v 1.4 2002/01/29 21:00:26 matt Exp $ package XML::SAX::PurePerl; use strict; use XML::SAX::PurePerl::Productions qw($NameChar $SingleChar); sub elementdecl { my ($self, $reader) = @_; if ($reader->match_string('skip_whitespace($reader) || $self->parser_error("No whitespace after ELEMENT declaration", $reader); my $name = $self->Name($reader); $self->skip_whitespace($reader) || $self->parser_error("No whitespace after ELEMENT's name", $reader); $self->contentspec($reader, $name); $self->skip_whitespace($reader); $reader->match('>') || $self->parser_error("Closing angle bracket not found on ELEMENT declaration", $reader); return 1; } return 0; } sub contentspec { my ($self, $reader, $name) = @_; my $model; if ($reader->match_string('EMPTY')) { $model = 'EMPTY'; } elsif ($reader->match_string('ANY')) { $model = 'ANY'; } else { $model = $self->Mixed_or_children($reader); } if ($model) { # call SAX callback now. $self->element_decl({Name => $name, Model => $model}); return 1; } $self->parser_error("contentspec not found in ELEMENT declaration", $reader); } sub Mixed_or_children { my ($self, $reader) = @_; my $model; if ($reader->match('(')) { $model = '('; $self->skip_whitespace($reader); if ($reader->match_string('#PCDATA')) { return $self->Mixed($reader); } # not matched - must be Children $reader->buffer('('); return $self->children($reader); } return; } # Mixed ::= ( '(' S* PCDATA ( S* '|' S* QName )* S* ')' '*' ) # | ( '(' S* PCDATA S* ')' ) sub Mixed { my ($self, $reader) = @_; # Mixed_or_children already matched '(' S* '#PCDATA' my $model = '(#PCDATA'; $self->skip_whitespace($reader); my %seen; while ($reader->match('|')) { $self->skip_whitespace($reader); my $name = $self->Name($reader) || $self->parser_error("No 'Name' after Mixed content '|'", $reader); if ($seen{$name}) { $self->parser_error("Element '$name' has already appeared in this group", $reader); } $seen{$name}++; $model .= "|$name"; $self->skip_whitespace($reader); } $reader->match(')') || $self->parser_error("no closing bracket on mixed content", $reader); $model .= ")"; if ($reader->match('*')) { $model .= "*"; } return $model; } # [[47]] Children ::= ChoiceOrSeq Cardinality? # [[48]] Cp ::= ( QName | ChoiceOrSeq ) Cardinality? # ChoiceOrSeq ::= '(' S* Cp ( Choice | Seq )? S* ')' # [[49]] Choice ::= ( S* '|' S* Cp )+ # [[50]] Seq ::= ( S* ',' S* Cp )+ # // Children ::= (Choice | Seq) Cardinality? # // Cp ::= ( QName | Choice | Seq) Cardinality? # // Choice ::= '(' S* Cp ( S* '|' S* Cp )+ S* ')' # // Seq ::= '(' S* Cp ( S* ',' S* Cp )* S* ')' # [[51]] Mixed ::= ( '(' S* PCDATA ( S* '|' S* QName )* S* ')' MixedCardinality ) # | ( '(' S* PCDATA S* ')' ) # Cardinality ::= '?' | '+' | '*' # MixedCardinality ::= '*' sub children { my ($self, $reader) = @_; return $self->ChoiceOrSeq($reader) . $self->Cardinality($reader); } sub ChoiceOrSeq { my ($self, $reader) = @_; $reader->match('(') || $self->parser_error("choice/seq contains no opening bracket", $reader); my $model = '('; $self->skip_whitespace($reader); $model .= $self->Cp($reader); if (my $choice = $self->Choice($reader)) { $model .= $choice; } else { $model .= $self->Seq($reader); } $self->skip_whitespace($reader); $reader->match(')') || $self->parser_error("choice/seq contains no closing bracket", $reader); $model .= ')'; return $model; } sub Cardinality { my ($self, $reader) = @_; # cardinality is always optional if ($reader->match('?')) { return '?'; } if ($reader->match('+')) { return '+'; } if ($reader->match('*')) { return '*'; } return ''; } sub Cp { my ($self, $reader) = @_; my $model; if (my $name = $self->Name($reader)) { return $name . $self->Cardinality($reader); } return $self->ChoiceOrSeq($reader) . $self->Cardinality($reader); } sub Choice { my ($self, $reader) = @_; my $model = ''; $self->skip_whitespace($reader); while ($reader->match('|')) { $self->skip_whitespace($reader); $model .= '|'; $model .= $self->Cp($reader); $self->skip_whitespace($reader); } return $model; } sub Seq { my ($self, $reader) = @_; my $model = ''; $self->skip_whitespace($reader); while ($reader->match(',')) { $self->skip_whitespace($reader); $model .= ','; $model .= $self->Cp($reader); $self->skip_whitespace($reader); } return $model; } sub AttlistDecl { my ($self, $reader) = @_; if ($reader->match_string('skip_whitespace($reader) || $self->parser_error("No whitespace after ATTLIST declaration", $reader); my $name = $self->Name($reader); $self->AttDefList($reader, $name); $self->skip_whitespace($reader); $reader->match('>') || $self->parser_error("Closing angle bracket not found on ATTLIST declaration", $reader); return 1; } return 0; } sub AttDefList { my ($self, $reader, $name) = @_; 1 while $self->AttDef($reader, $name); } sub AttDef { my ($self, $reader, $el_name) = @_; $self->skip_whitespace($reader) || return 0; my $att_name = $self->Name($reader) || return 0; $self->skip_whitespace($reader) || $self->parser_error("No whitespace after Name in attribute definition", $reader); my $att_type = $self->AttType($reader); $self->skip_whitespace($reader) || $self->parser_error("No whitespace after AttType in attribute definition", $reader); my ($default, $value) = $self->DefaultDecl($reader); # fire SAX event here! $self->attribute_decl({ eName => $el_name, aName => $att_name, Type => $att_type, ValueDefault => $default, Value => $value, }); return 1; } sub AttType { my ($self, $reader) = @_; return $self->StringType($reader) || $self->TokenizedType($reader) || $self->EnumeratedType($reader) || $self->parser_error("Can't match AttType", $reader); } sub StringType { my ($self, $reader) = @_; if ($reader->match_string('CDATA')) { return 'CDATA'; } return; } sub TokenizedType { my ($self, $reader) = @_; if ($reader->match_string('IDREFS')) { return 'IDREFS'; } if ($reader->match_string('IDREF')) { return 'IDREF'; } if ($reader->match_string('ID')) { return 'ID'; } if ($reader->match_string('ENTITIES')) { return 'ENTITIES'; } if ($reader->match_string('ENTITY')) { return 'ENTITY'; } if ($reader->match_string('NMTOKENS')) { return 'NMTOKENS'; } if ($reader->match_string('NMTOKEN')) { return 'NMTOKEN'; } return; } sub EnumeratedType { my ($self, $reader) = @_; return $self->NotationType($reader) || $self->Enumeration($reader); } sub NotationType { my ($self, $reader) = @_; if ($reader->match_string('NOTATION')) { $self->skip_whitespace($reader) || $self->parser_error("No whitespace after NOTATION", $reader); $reader->match('(') || $self->parser_error("No opening bracket in notation section", $reader); $self->skip_whitespace($reader); my $model = 'NOTATION ('; my $name = $self->Name($reader) || $self->parser_error("No name in notation section", $reader); $model .= $name; $self->skip_whitespace($reader); while ($reader->match('|')) { $model .= '|'; $self->skip_whitespace($reader); my $name = $self->Name($reader) || $self->parser_error("No name in notation section", $reader); $model .= $name; $self->skip_whitespace($reader); } $reader->match(')') || $self->parser_error("No closing bracket in notation section", $reader); $model .= ')'; return $model; } return; } sub Enumeration { my ($self, $reader) = @_; if ($reader->match('(')) { $self->skip_whitespace($reader); my $model = '('; my $nmtoken = $self->Nmtoken($reader) || $self->parser_error("No Nmtoken in enumerated declaration", $reader); $model .= $nmtoken; $self->skip_whitespace($reader); while ($reader->match('|')) { $model .= '|'; $self->skip_whitespace($reader); my $nmtoken = $self->Nmtoken($reader) || $self->parser_error("No Nmtoken in enumerated declaration", $reader); $model .= $nmtoken; $self->skip_whitespace($reader); } $reader->match(')') || $self->parser_error("No closing bracket in enumerated declaration", $reader); $model .= ')'; return $model; } return; } sub Nmtoken { my ($self, $reader) = @_; $reader->consume($NameChar); return $reader->consumed; } sub DefaultDecl { my ($self, $reader) = @_; if ($reader->match_string('#REQUIRED')) { return '#REQUIRED'; } if ($reader->match_string('#IMPLIED')) { return '#IMPLIED'; } my $model = ''; if ($reader->match_string('#FIXED')) { $self->skip_whitespace($reader) || $self->parser_error( "no whitespace after FIXED specifier", $reader); my $value = $self->AttValue($reader); return "#FIXED", $value; } my $value = $self->AttValue($reader); return undef, $value; } sub EntityDecl { my ($self, $reader) = @_; if ($reader->match_string('skip_whitespace($reader) || $self->parser_error( "No whitespace after ENTITY declaration", $reader); $self->PEDecl($reader) || $self->GEDecl($reader); $self->skip_whitespace($reader); $reader->match('>') || $self->parser_error("No closing '>' in entity definition", $reader); return 1; } return 0; } sub GEDecl { my ($self, $reader) = @_; my $name = $self->Name($reader) || $self->parser_error("No entity name given", $reader); $self->skip_whitespace($reader) || $self->parser_error("No whitespace after entity name", $reader); # TODO: ExternalID calls lexhandler method. Wrong place for it. my $value; if ($value = $self->ExternalID($reader)) { $value .= $self->NDataDecl($reader); } else { $value = $self->EntityValue($reader); } if ($self->{ParseOptions}{entities}{$name}) { warn("entity $name already exists\n"); } else { $self->{ParseOptions}{entities}{$name} = 1; $self->{ParseOptions}{expanded_entity}{$name} = $value; # ??? } # do callback? return 1; } sub PEDecl { my ($self, $reader) = @_; $reader->match('%') || return 0; $self->skip_whitespace($reader) || $self->parser_error("No whitespace after parameter entity marker", $reader); my $name = $self->Name($reader) || $self->parser_error("No parameter entity name given", $reader); $self->skip_whitespace($reader) || $self->parser_error("No whitespace after parameter entity name", $reader); my $value = $self->ExternalID($reader) || $self->EntityValue($reader) || $self->parser_error("PE is not a value or an external resource", $reader); # do callback? return 1; } my $quotre = qr/[^%&\"]/; my $aposre = qr/[^%&\']/; sub EntityValue { my ($self, $reader) = @_; my $quote = '"'; my $re = $quotre; if (!$reader->match($quote)) { $quote = "'"; $re = $aposre; $reader->match($quote) || $self->parser_error("Not a quote character", $reader); } my $value = ''; while (1) { if ($reader->consume($re)) { $value .= $reader->consumed; } elsif ($reader->match('&')) { # if it's a char ref, expand now: if ($reader->match('#')) { my $char; my $ref; if ($reader->match('x')) { $reader->consume(qr/[0-9a-fA-F]/) || $self->parser_error("Hex character reference contains illegal characters", $reader); $ref = $reader->consumed; $char = chr_ref(hex($ref)); $ref = "x$ref"; } else { $reader->consume(qr/[0-9]/) || $self->parser_error("Decimal character reference contains illegal characters", $reader); $ref = $reader->consumed; $char = chr($ref); } $reader->match(';') || $self->parser_error("No semi-colon found after character reference", $reader); if ($char !~ $SingleChar) { # match a single character $self->parser_error("Character reference '&#$ref;' refers to an illegal XML character ($char)", $reader); } $value .= $char; } else { # entity refs in entities get expanded later, so don't parse now. $value .= '&'; } } elsif ($reader->match('%')) { $value .= $self->PEReference($reader); } elsif ($reader->match($quote)) { # end of attrib last; } else { $self->parser_error("Invalid character in attribute value", $reader); } } return $value; } sub NDataDecl { my ($self, $reader) = @_; $self->skip_whitespace($reader) || return ''; $reader->match_string("NDATA") || return ''; $self->skip_whitespace($reader) || $self->parser_error("No whitespace after NDATA declaration", $reader); my $name = $self->Name($reader) || $self->parser_error("NDATA declaration lacks a proper Name", $reader); return " NDATA $name"; } sub NotationDecl { my ($self, $reader) = @_; if ($reader->match_string('skip_whitespace($reader) || $self->parser_error("No whitespace after NOTATION declaration", $reader); $reader->consume(qr/[^>]/); # FIXME $reader->match('>'); # FIXME $self->notation_decl({Name => "FIXME", SystemId => "FIXME", PublicId => "FIXME" }); return 1; } return 0; } 1;