001/* DTD.java -- 002 Copyright (C) 2005 Free Software Foundation, Inc. 003 004This file is part of GNU Classpath. 005 006GNU Classpath is free software; you can redistribute it and/or modify 007it under the terms of the GNU General Public License as published by 008the Free Software Foundation; either version 2, or (at your option) 009any later version. 010 011GNU Classpath is distributed in the hope that it will be useful, but 012WITHOUT ANY WARRANTY; without even the implied warranty of 013MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 014General Public License for more details. 015 016You should have received a copy of the GNU General Public License 017along with GNU Classpath; see the file COPYING. If not, write to the 018Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 01902110-1301 USA. 020 021Linking this library statically or dynamically with other modules is 022making a combined work based on this library. Thus, the terms and 023conditions of the GNU General Public License cover the whole 024combination. 025 026As a special exception, the copyright holders of this library give you 027permission to link this library with independent modules to produce an 028executable, regardless of the license terms of these independent 029modules, and to copy and distribute the resulting executable under 030terms of your choice, provided that you also meet, for each linked 031independent module, the terms and conditions of the license of that 032module. An independent module is a module which is not derived from 033or based on this library. If you modify this library, you may extend 034this exception to your version of the library, but you are not 035obligated to do so. If you do not wish to do so, delete this 036exception statement from your version. */ 037 038 039package javax.swing.text.html.parser; 040 041import java.io.DataInputStream; 042import java.io.EOFException; 043import java.io.IOException; 044import java.io.ObjectInputStream; 045import java.lang.reflect.Field; 046import java.lang.reflect.Modifier; 047import java.util.BitSet; 048import java.util.Hashtable; 049import java.util.StringTokenizer; 050import java.util.Vector; 051 052/** 053 * <p>Representation or the SGML DTD document. 054 * Provides basis for describing a syntax of the 055 * HTML documents. The fields of this class are NOT initialized in 056 * constructor. You need to do this separately before passing this data 057 * structure to the HTML parser. The subclasses with the fields, pre- 058 * initialized, for example, for HTML 4.01, can be available only between 059 * the implementation specific classes 060 * ( for example, {@link gnu.javax.swing.text.html.parser.HTML_401F } 061 * in this implementation).</p> 062 * <p> 063 * If you need more information about SGML DTD documents, 064 * the author suggests to read SGML tutorial on 065 * <a href="http://www.w3.org/TR/WD-html40-970708/intro/sgmltut.html" 066 * >http://www.w3.org/TR/WD-html40-970708/intro/sgmltut.html</a>. 067 * We also recommend Goldfarb C.F (1991) <i>The SGML Handbook</i>, 068 * Oxford University Press, 688 p, ISBN: 0198537379. 069 * </p> 070 * <p> 071 * Warning: the html, head and other tag fields will only be automatically 072 * assigned if the VM has the correctly implemented reflection mechanism. 073 * As these fields are not used anywhere in the implementation, not 074 * exception will be thrown in the opposite case. 075 * </p> 076 * 077 * @author Audrius Meskauskas, Lithuania (AudriusA@Bioinformatics.org) 078 */ 079public class DTD 080 implements DTDConstants 081{ 082 /** 083 * The version of the persistent data format. 084 * @specnote This was made <code>final</code> in 1.5. 085 */ 086 public static final int FILE_VERSION = 1; 087 088 /** 089 * The table of existing available DTDs. 090 */ 091 static Hashtable<String,DTD> dtdHash = new Hashtable<String,DTD>(); 092 093 /** 094 * The applet element for this DTD. 095 */ 096 public Element applet; 097 098 /** 099 * The base element for this DTD. 100 */ 101 public Element base; 102 103 /** 104 * The body element for this DTD. 105 */ 106 public Element body; 107 108 /** 109 * The head element for this DTD. 110 */ 111 public Element head; 112 113 /** 114 * The html element for this DTD. 115 */ 116 public Element html; 117 118 /** 119 * The isindex element of for this DTD. 120 */ 121 public Element isindex; 122 123 /** 124 * The meta element for this DTD. 125 */ 126 public Element meta; 127 128 /** 129 * The p element for this DTD. 130 */ 131 public Element p; 132 133 /** 134 * The param element for this DTD. 135 */ 136 public Element param; 137 138 /** 139 * The pcdata for this DTD. 140 */ 141 public Element pcdata; 142 143 /** 144 * The title element for this DTD. 145 */ 146 public Element title; 147 148 /** 149 * The element for accessing all DTD elements by name. 150 */ 151 public Hashtable<String,Element> elementHash = 152 new Hashtable<String,Element>(); 153 154 /** 155 * The entity table for accessing all DTD entities by name. 156 */ 157 public Hashtable<Object, Entity> entityHash = new Hashtable<Object, Entity>(); 158 159 /** 160 * The name of this DTD. 161 */ 162 public String name; 163 164 /** 165 * Contains all elements in this DTD. The 166 * javax.swing.text.html.parser.Element#index field of all elements 167 * in this vector is set to the element position in this vector. 168 */ 169 public Vector<Element> elements = new Vector<Element>(); 170 171 /** Create a new DTD with the specified name. */ 172 protected DTD(String a_name) 173 { 174 name = a_name; 175 } 176 177 /** Get this DTD by name. The current implementation 178 * only looks in the internal table of DTD documents. If no corresponding 179 * entry is found, the new entry is created, placed into 180 * the table and returned. */ 181 public static DTD getDTD(String name) 182 throws IOException 183 { 184 DTD d = dtdHash.get(name); 185 186 if (d == null) 187 { 188 d = new DTD(name); 189 dtdHash.put(d.name, d); 190 } 191 192 return d; 193 } 194 195 /** 196 * Get the element by the element name. If the element is not yet 197 * defined, it is newly created and placed into the element table. 198 * If the element name matches (ingoring case) a public non static 199 * element field in this class, this field is assigned to the value 200 * of the newly created element. 201 */ 202 public Element getElement(String element_name) 203 { 204 return newElement(element_name); 205 } 206 207 /** 208 * Get the element by the value of its 209 * {@link javax.swing.text.html.parser.Element#index} field. 210 */ 211 public Element getElement(int index) 212 { 213 return elements.get(index); 214 } 215 216 /** 217 * Get the entity with the given identifier. 218 * @param id that can be returned by 219 * {@link javax.swing.text.html.parser.Entity#name2type(String an_entity)} 220 * @return The entity from this DTD or null if there is no entity with 221 * such id or such entity is not present in the table of this instance. 222 */ 223 public Entity getEntity(int id) 224 { 225 String name = Entity.mapper.get(id); 226 227 if (name != null) 228 return entityHash.get(name); 229 else 230 return null; 231 } 232 233 /** 234 * Get the named entity by its name. 235 */ 236 public Entity getEntity(String entity_name) 237 { 238 return entityHash.get(entity_name); 239 } 240 241 /** 242 * Get the name of this instance of DTD 243 */ 244 public String getName() 245 { 246 return name; 247 } 248 249 /** 250 * Creates, adds into the entity table and returns the 251 * character entity like <code>&lt;</code> 252 * (means '<code><</code>' ); 253 * @param name The entity name (without heading & and closing ;) 254 * @param type The entity type 255 * @param character The entity value (single character) 256 * @return The created entity 257 */ 258 public Entity defEntity(String name, int type, int character) 259 { 260 Entity e = newEntity(name, type); 261 e.data = new char[] { (char) character }; 262 return e; 263 } 264 265 /** 266 * Define the attributes for the element with the given name. 267 * If the element is not exist, it is created. 268 * @param forElement 269 * @param attributes 270 */ 271 public void defineAttributes(String forElement, AttributeList attributes) 272 { 273 Element e = elementHash.get(forElement.toLowerCase()); 274 275 if (e == null) 276 e = newElement(forElement); 277 278 e.atts = attributes; 279 } 280 281 /** 282 * Defines the element and adds it to the element table. Sets the 283 * <code>Element.index</code> field to the value, unique for this 284 * instance of DTD. If the element with the given name already exists, 285 * replaces all other its settings by the method argument values. 286 * @param name the name of the element 287 * @param type the type of the element 288 * @param headless true if the element needs no starting tag 289 * (should not occur in HTML). 290 * @param tailless true if the element needs no ending tag (like 291 * <code><hr></code> 292 * @param content the element content 293 * @param exclusions the set of elements that must not occur inside 294 * this element. The <code>Element.index</code> value defines which 295 * bit in this bitset corresponds to that element. 296 * @param inclusions the set of elements that can occur inside this 297 * element. the <code>Element.index</code> value defines which 298 * bit in this bitset corresponds to that element. 299 * @param attributes the element attributes. 300 * @return the newly defined element. 301 */ 302 public Element defineElement(String name, int type, boolean headless, 303 boolean tailless, ContentModel content, 304 BitSet exclusions, BitSet inclusions, 305 AttributeList attributes 306 ) 307 { 308 Element e = newElement(name); 309 e.type = type; 310 e.oStart = headless; 311 e.oEnd = tailless; 312 e.content = content; 313 e.exclusions = exclusions; 314 e.inclusions = inclusions; 315 e.atts = attributes; 316 317 return e; 318 } 319 320 /** 321 * Creates, intializes and adds to the entity table the new 322 * entity. 323 * @param name the name of the entity 324 * @param type the type of the entity 325 * @param data the data section of the entity 326 * @return the created entity 327 */ 328 public Entity defineEntity(String name, int type, char[] data) 329 { 330 Entity e = newEntity(name, type); 331 e.data = data; 332 333 return e; 334 } 335 336 /** Place this DTD into the DTD table. */ 337 public static void putDTDHash(String name, DTD dtd) 338 { 339 dtdHash.put(name, dtd); 340 } 341 342 /** 343 * <p>Reads DTD from an archived format. This format is not standardized 344 * and differs between implementations.</p><p> This implementation 345 * reads and defines all entities and elements using 346 * ObjectInputStream. The elements and entities can be written into the 347 * stream in any order. The objects other than elements and entities 348 * are ignored.</p> 349 * @param stream A data stream to read from. 350 * @throws java.io.IOException If one is thrown by the input stream 351 */ 352 public void read(DataInputStream stream) 353 throws java.io.IOException 354 { 355 ObjectInputStream oi = new ObjectInputStream(stream); 356 Object def; 357 try 358 { 359 while (true) 360 { 361 def = oi.readObject(); 362 if (def instanceof Element) 363 { 364 Element e = (Element) def; 365 elementHash.put(e.name.toLowerCase(), e); 366 assignField(e); 367 } 368 else if (def instanceof Entity) 369 { 370 Entity e = (Entity) def; 371 entityHash.put(e.name, e); 372 } 373 } 374 } 375 catch (ClassNotFoundException ex) 376 { 377 throw new IOException(ex.getMessage()); 378 } 379 catch (EOFException ex) 380 { 381 // ok EOF 382 } 383 } 384 385 /** 386 * Returns the name of this instance of DTD. 387 */ 388 public String toString() 389 { 390 return name; 391 } 392 393 /** 394 * Creates and returns new attribute (not an attribute list). 395 * @param name the name of this attribute 396 * @param type the type of this attribute (FIXED, IMPLIED or 397 * REQUIRED from <code>DTDConstants</code>). 398 * @param modifier the modifier of this attribute 399 * @param default_value the default value of this attribute 400 * @param allowed_values the allowed values of this attribute. The multiple 401 * possible values in this parameter are supposed to be separated by 402 * '|', same as in SGML DTD <code><!ATTLIST </code>tag. This parameter 403 * can be null if no list of allowed values is specified. 404 * @param atts the previous attribute of this element. This is 405 * placed to the field 406 * {@link javax.swing.text.html.parser.AttributeList#next }, 407 * creating a linked list. 408 * @return The attributes. 409 */ 410 protected AttributeList defAttributeList(String name, int type, int modifier, 411 String default_value, 412 String allowed_values, 413 AttributeList atts 414 ) 415 { 416 AttributeList al = new AttributeList(name); 417 al.modifier = modifier; 418 al.value = default_value; 419 al.next = atts; 420 421 if (allowed_values != null) 422 { 423 StringTokenizer st = new StringTokenizer(allowed_values, " \t|"); 424 Vector<String> v = new Vector<String>(st.countTokens()); 425 426 while (st.hasMoreTokens()) 427 v.add(st.nextToken()); 428 429 al.values = v; 430 } 431 432 return al; 433 } 434 435 /** 436 * Creates a new content model. 437 * @param type specifies the BNF operation for this content model. 438 * The valid operations are documented in the 439 * {@link javax.swing.text.html.parser.ContentModel#type }. 440 * @param content the content of this content model 441 * @param next if the content model is specified by BNF-like 442 * expression, contains the rest of this expression. 443 * @return The newly created content model. 444 */ 445 protected ContentModel defContentModel(int type, Object content, 446 ContentModel next 447 ) 448 { 449 ContentModel model = new ContentModel(); 450 model.type = type; 451 model.next = next; 452 model.content = content; 453 454 return model; 455 } 456 457 /** 458 * Defines a new element and adds it to the element table. 459 * If the element alredy exists, 460 * overrides it settings with the specified values. 461 * @param name the name of the new element 462 * @param type the type of the element 463 * @param headless true if the element needs no starting tag 464 * @param tailless true if the element needs no closing tag 465 * @param content the element content. 466 * @param exclusions the elements that must be excluded from the 467 * content of this element, in all levels of the hierarchy. 468 * @param inclusions the elements that can be included as the 469 * content of this element. 470 * @param attributes the element attributes. 471 * @return the created or updated element. 472 */ 473 protected Element defElement(String name, int type, boolean headless, 474 boolean tailless, ContentModel content, 475 String[] exclusions, String[] inclusions, 476 AttributeList attributes 477 ) 478 { 479 // compute the bit sets 480 BitSet exclude = bitSet(exclusions); 481 BitSet include = bitSet(inclusions); 482 483 Element e = 484 defineElement(name, type, headless, tailless, content, exclude, include, 485 attributes 486 ); 487 488 return e; 489 } 490 491 /** 492 * Creates, intializes and adds to the entity table the new 493 * entity. 494 * @param name the name of the entity 495 * @param type the type of the entity 496 * @param data the data section of the entity 497 * @return the created entity 498 */ 499 protected Entity defEntity(String name, int type, String data) 500 { 501 Entity e = newEntity(name, type); 502 e.data = data.toCharArray(); 503 504 return e; 505 } 506 507 private void assignField(Element e) 508 { 509 String element_name = e.name; 510 try 511 { 512 // Assign the field via reflection. 513 Field f = getClass().getField(element_name.toLowerCase()); 514 if ((f.getModifiers() & Modifier.PUBLIC) != 0) 515 if ((f.getModifiers() & Modifier.STATIC) == 0) 516 if (f.getType().isAssignableFrom(e.getClass())) 517 f.set(this, e); 518 } 519 catch (IllegalAccessException ex) 520 { 521 unexpected(ex); 522 } 523 catch (NoSuchFieldException ex) 524 { 525 // This is ok. 526 } 527 528 // Some virtual machines may still lack the proper 529 // implementation of reflection. As the tag fields 530 // are not used anywhere in this implementation, 531 // (and this class is also rarely used by the end user), 532 // it may be better not to crash everything by throwing an error 533 // for each case when the HTML parsing is required. 534 catch (Throwable t) 535 { 536 // This VM has no reflection mechanism implemented! 537 if (t instanceof OutOfMemoryError) 538 throw (Error) t; 539 } 540 } 541 542 /** 543 * Create the bit set for this array of elements. 544 * The unknown elements are automatically defined and added 545 * to the element table. 546 * @param elements 547 * @return The bit set. 548 */ 549 private BitSet bitSet(String[] elements) 550 { 551 BitSet b = new BitSet(); 552 553 for (int i = 0; i < elements.length; i++) 554 { 555 Element e = getElement(elements [ i ]); 556 557 if (e == null) 558 e = newElement(elements [ i ]); 559 560 b.set(e.index); 561 } 562 563 return b; 564 } 565 566 /** 567 * Find the element with the given name in the element table. 568 * If not find, create a new element with this name and add to the 569 * table. 570 * @param name the name of the element 571 * @return the found or created element. 572 */ 573 private Element newElement(String name) 574 { 575 Element e = elementHash.get(name.toLowerCase()); 576 577 if (e == null) 578 { 579 e = new Element(); 580 e.name = name; 581 e.index = elements.size(); 582 elements.add(e); 583 elementHash.put(e.name.toLowerCase(), e); 584 assignField(e); 585 } 586 return e; 587 } 588 589 /** 590 * Creates and adds to the element table the entity with an 591 * unitialized data section. Used internally. 592 * @param name the name of the entity 593 * @param type the type of the entity, a bitwise combination 594 * of GENERAL, PARAMETER, SYSTEM and PUBLIC. 595 * 596 * @return the created entity 597 */ 598 private Entity newEntity(String name, int type) 599 { 600 Entity e = new Entity(name, type, null); 601 entityHash.put(e.name, e); 602 return e; 603 } 604 605 private void unexpected(Exception ex) 606 { 607 throw new Error("This should never happen, report a bug", ex); 608 } 609}