1   /****************************************************************************
2    **
3    ** This file is part of yFiles-2.9. 
4    ** 
5    ** yWorks proprietary/confidential. Use is subject to license terms.
6    **
7    ** Redistribution of this file or of an unauthorized byte-code version
8    ** of this file is strictly forbidden.
9    **
10   ** Copyright (c) 2000-2011 by yWorks GmbH, Vor dem Kreuzberg 28, 
11   ** 72070 Tuebingen, Germany. All rights reserved.
12   **
13   ***************************************************************************/
14  package demo.layout.genealogy.iohandler;
15  
16  import org.xml.sax.XMLReader;
17  import org.xml.sax.Locator;
18  import org.xml.sax.ContentHandler;
19  import org.xml.sax.ErrorHandler;
20  import org.xml.sax.EntityResolver;
21  import org.xml.sax.DTDHandler;
22  import org.xml.sax.SAXException;
23  import org.xml.sax.InputSource;
24  import org.xml.sax.SAXParseException;
25  import org.xml.sax.SAXNotRecognizedException;
26  import org.xml.sax.helpers.AttributesImpl;
27  import org.xml.sax.helpers.DefaultHandler;
28  
29  import java.util.Locale;
30  import java.util.Stack;
31  import java.io.IOException;
32  import java.io.BufferedReader;
33  import java.net.URL;
34  
35  /**
36  *  gedcom.gedml.GedcomParser
37  *
38  *  This class is designed to look like a SAX2-compliant XML parser; however,
39  *  it takes GEDCOM as its input rather than XML.
40  *  The events sent to the ContentHandler reflect the GEDCOM input "as is";
41  *  there is no validation or conversion of tags.<br/>
42   * http://homepage.ntlworld.com/michael.h.kay/gedml/index.html
43  *
44  *  @version 21 January 2001 - revised to conform to SAX2
45  */
46  
47  
48  
49  public class GedcomParser implements XMLReader, Locator {
50  
51      private ContentHandler contentHandler;
52      private ErrorHandler errorHandler;
53      private AttributesImpl emptyAttList = new AttributesImpl();
54      private AttributesImpl attList = new AttributesImpl();
55  
56      private String systemId;
57      private int lineNr;
58  
59      /**
60      * Set the ContentHandler
61      * @param handler User-supplied content handler
62      */
63  
64      public void setContentHandler(ContentHandler handler) {
65          contentHandler = handler;
66      }
67  
68      /**
69      * Get the ContentHandler
70      */
71  
72      public ContentHandler getContentHandler() {
73          return contentHandler;
74      }
75  
76      /**
77      * Set the entityResolver.
78      * This call has no effect, because entities are not used in GEDCOM files.
79      */
80  
81      public void setEntityResolver(EntityResolver er) {}
82  
83      /**
84      * Get the entityResolver
85      */
86  
87      public EntityResolver getEntityResolver() {
88          return null;
89      }
90  
91      /**
92      * Set the DTDHandler
93      * This call has no effect, because DTDs are not used in GEDCOM files.
94      */
95  
96      public void setDTDHandler(DTDHandler dh) {}
97  
98      /**
99      * Get the DTDHandler
100     */
101 
102     public DTDHandler getDTDHandler() {
103         return null;
104     }
105 
106     /**
107     * Set the error handler
108     * @param eh A user-supplied error handler
109     */
110 
111     public void setErrorHandler(ErrorHandler eh) {
112         errorHandler = eh;
113     }
114 
115     /**
116     * Get the error handler
117     */
118 
119     public ErrorHandler getErrorHandler() {
120         return errorHandler;
121     }
122 
123     /**
124     * Set the locale.
125     * This call has no effect: locales are not supported.
126     */
127 
128     public void setLocale(Locale locale) {}
129 
130     /**
131     * Parse input from the supplied systemId
132     */
133 
134     public void parse(String systemId) throws SAXException, IOException {
135         this.systemId = systemId;
136         parse( new BufferedReader(
137                     new AnselInputStreamReader(
138                         (new URL(systemId)).openStream() ) ) );
139     }
140 
141     /**
142     * Parse input from the supplied InputSource
143     */
144 
145     public void parse(InputSource source) throws SAXException, IOException {
146 
147         if (contentHandler==null) contentHandler = new DefaultHandler();
148         if (errorHandler==null) errorHandler = new DefaultHandler();
149         systemId = source.getSystemId();
150 
151         if (source.getCharacterStream()!=null) {
152             parse( new BufferedReader(source.getCharacterStream()) );
153         } else if (source.getByteStream()!=null) {
154             parse( new BufferedReader (
155                   new AnselInputStreamReader(
156                       source.getByteStream() ) ) );
157         } else if (systemId!=null) {
158             parse( systemId );
159         } else {
160             throw new SAXException("No input supplied");
161         }
162     }
163 
164     /**
165     * Parse input from a supplied BufferedReader
166     */
167 
168     private void parse(BufferedReader reader) throws SAXException, IOException {
169 
170         String line, currentLine, token1, token2;
171         String level;
172         int thislevel;
173         int prevlevel;
174         String iden, tag, xref, valu, type;
175         int cpos1;
176         int cpos2;
177         int i;
178 
179         char[] newlineCharArray = new char[1];
180         newlineCharArray[0] = '\n';
181 
182         lineNr = 0;
183         currentLine = "";
184 
185         Stack stack = new Stack();
186         stack.push("GED");
187 
188         prevlevel = -1;
189 
190         contentHandler.setDocumentLocator(this);
191         contentHandler.startDocument();
192         contentHandler.startElement("", "GED", "GED", emptyAttList);
193 
194         try {
195 
196           while ( (line=reader.readLine() ) != null ) {
197 
198             line=line.trim().replace('"', '\'');
199 
200             lineNr++;
201             currentLine = line;
202 
203             // parse the GEDCOM line into five fields: level, iden, tag, xref, valu
204 
205             if (line.length() > 0) {
206                 cpos1 = line.indexOf(' ');
207                 if (cpos1<0) throw new SAXException("No space in line");
208 
209                 level = GedcomParser.firstWord(line);
210                 do {
211                   try {
212                       thislevel = Integer.parseInt(level);
213                   } catch (NumberFormatException err) {
214                     thislevel = -1;
215                     level = level.substring(1);
216                   }
217                 } while (thislevel < 0 && level.length() > 0);
218                 if (thislevel < 0) {
219                     throw new SAXException("Level number is not an integer");
220                 }
221 
222                 // check the level number
223 
224                 if (thislevel>prevlevel && !(thislevel==prevlevel+1))
225                     throw new SAXException("Level numbers must increase by 1");
226                 if (thislevel<0)
227                     throw new SAXException("Level number must not be negative");
228 
229                 line = GedcomParser.remainder(line);
230                 token1 = GedcomParser.firstWord(line);
231                 line = GedcomParser.remainder(line);
232 
233                 if (token1.startsWith("@")) {
234                     if (token1.length()==1 || !token1.endsWith("@"))
235                         throw new SAXException("Bad xref_id");
236 
237                     iden = token1.substring(1, token1.length() - 1);
238                     tag = GedcomParser.firstWord(line);
239                     line = GedcomParser.remainder(line);
240                 } else {
241                     iden = "";
242                     tag = token1;
243                 };
244 
245                 xref = "";
246                 if ( line.startsWith("@")) {
247                     token2 = GedcomParser.firstWord(line);
248                     if (token2.length()==1 || !token2.endsWith("@"))
249                         throw new SAXException("Bad pointer value");
250 
251                     xref = token2.substring(1, token2.length() - 1);
252                     line = GedcomParser.remainder(line);
253                 };
254 
255                 valu = line;
256 
257                 // perform validation on the CHAR field (character code)
258                 if (tag.equals("CHAR") &&
259                     !(valu.trim().equals("ANSEL") || valu.trim().equals("ASCII")))
260                 {
261                     System.err.println("WARNING: Character set is " + valu + ": should be ANSEL or ASCII");
262                 }
263 
264                 // insert any necessary closing tags
265                 while (thislevel <= prevlevel) {
266                     String endtag = (String)stack.pop();
267                     contentHandler.endElement("", endtag, endtag);
268                     prevlevel--;
269                 }
270 
271                 if (!tag.equals("TRLR")) {
272                     attList.clear();
273                     if (!iden.equals("")) attList.addAttribute("", "ID", "ID", "ID", iden);
274                     if (!xref.equals("")) attList.addAttribute("", "REF", "REF", "IDREF", xref);
275                     contentHandler.startElement("", tag, tag, attList);
276                   stack.push(tag);
277                   prevlevel = thislevel;
278                 }
279 
280                 if (valu.length()>0) {
281                     contentHandler.characters(valu.toCharArray(), 0, valu.length());
282                 }
283           }
284 
285           } // end while
286 
287           contentHandler.endElement("", "GED", "GED");
288           contentHandler.endDocument();
289           //System.err.println("Parsing complete: " + lineNr + " lines");
290 
291         } catch (SAXException e1) {
292             SAXParseException err = new SAXParseException(e1.getMessage(), this);
293             errorHandler.fatalError(err);
294             throw err;
295         } finally {
296             reader.close();
297         }
298 
299     };
300 
301     /**
302     * Set a feature
303     */
304 
305     public void setFeature(String s, boolean b) throws SAXNotRecognizedException {
306         if (s.equals("http://xml.org/sax/features/namespaces") && b) return;
307         if (s.equals("http://xml.org/sax/features/namespace-prefixes") && !b) return;
308         throw new SAXNotRecognizedException("Gedcom Parser does not recognize any features");
309     }
310 
311     /**
312     * Get a feature
313     */
314 
315     public boolean getFeature(String s) throws SAXNotRecognizedException {
316         if (s.equals("http://xml.org/sax/features/namespaces")) return true;
317         if (s.equals("http://xml.org/sax/features/namespace-prefixes")) return false;
318         throw new SAXNotRecognizedException("Gedcom Parser does not recognize any features");
319     }
320 
321     /**
322     * Set a property
323     */
324 
325     public void setProperty(String s, Object b) throws SAXNotRecognizedException {
326         throw new SAXNotRecognizedException("Gedcom Parser does not recognize any properties");
327     }
328 
329     /**
330     * Get a property
331     */
332 
333     public Object getProperty(String s) throws SAXNotRecognizedException {
334         throw new SAXNotRecognizedException("Gedcom Parser does not recognize any properties");
335     }
336 
337 
338   /**
339     * Procedure to return the first word in a string
340     */
341     private static String firstWord(String inp)
342     {
343         int i;
344         i = inp.indexOf(' ');
345         if (i==0) return GedcomParser.firstWord(inp.trim());
346         if (i<0) return inp;
347         return inp.substring(0,i).trim();
348     }
349 
350   /**
351     * Procedure to return the text after the first word in a string
352     */
353 
354     private static String remainder(String inp)
355     {
356         final int i = inp.indexOf(' ');
357         if (i==0) {
358             return GedcomParser.remainder(inp.trim());
359         } else if (i < 0) {
360             return "";
361         }
362         return inp.substring(i+1,inp.length()).trim();
363     }
364 
365     /**
366     * Get the publicId: always null
367     */
368 
369     public String getPublicId() {
370         return null;
371     }
372 
373     /**
374     * Get the system ID
375     */
376 
377     public String getSystemId() {
378         return systemId;
379     }
380 
381     /**
382     * Get the line number
383     */
384 
385     public int getLineNumber() {
386         return lineNr;
387     }
388 
389     /**
390     * Get the column number: always -1
391     */
392 
393     public int getColumnNumber() {
394         return -1;
395     }
396 
397 }
398