001    /* 
002     * Copyright 2008-2009 the original author or authors.
003     * The contents of this file are subject to the Mozilla Public License
004     * Version 1.1 (the "License"); you may not use this file except in
005     * compliance with the License. You may obtain a copy of the License at
006     * http://www.mozilla.org/MPL/
007     *
008     * Software distributed under the License is distributed on an "AS IS"
009     * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
010     * License for the specific language governing rights and limitations
011     * under the License.
012     */
013     
014    package com.mtgi.analytics;
015    
016    import static java.lang.Character.isLetter;
017    import static java.lang.Character.isLetterOrDigit;
018    
019    import java.io.StringWriter;
020    import java.util.HashMap;
021    import java.util.Iterator;
022    import java.util.LinkedHashMap;
023    import java.util.Map.Entry;
024    
025    import javax.xml.stream.XMLOutputFactory;
026    import javax.xml.stream.XMLStreamException;
027    import javax.xml.stream.XMLStreamWriter;
028    
029    public class EventDataElementSerializer {
030    
031            private static final char[] DUMMY_TEXT = {};
032            
033            private XMLOutputFactory factory;
034            private StringWriter buffer;
035            
036            public EventDataElementSerializer(XMLOutputFactory factory) {
037                    this.buffer = new StringWriter();
038                    this.factory = factory;
039            }
040            
041            /**
042             * Serialize the given event data as a standalone XML document
043             * @param includeProlog if true, include an XML prolog; if not, just render a document fragment
044             * @return the XML serialization, or null if <code>data</code> is null.
045             */
046            public String serialize(EventDataElement data, boolean includeProlog) {
047                    if (data == null || data.isNull())
048                            return null;
049                    
050                    try {
051                            //serialize the DOM to our string buffer.
052                            XMLStreamWriter writer = factory.createXMLStreamWriter(buffer);
053                            try {
054                                    if (includeProlog)
055                                            writer.writeStartDocument();
056                                    serializeElement(writer, data);
057                                    writer.writeEndDocument();
058                                    writer.flush();
059                            } finally {
060                                    writer.close();
061                            }
062                            
063                            //return buffer contents.
064                            return buffer.toString();
065                            
066                    } catch (XMLStreamException ioe) {
067                            //this shouldn't happen, since the target output stream is a StringWriter, but
068                            //the compiler demands that we handle it.
069                            throw new RuntimeException("Error serializing XML data", ioe);
070                    } finally {
071                            
072                            //reset the internal buffer for next run.
073                            buffer.getBuffer().setLength(0);
074                    }
075            }
076            
077            /**
078             * Recursively serialize a single element, appending it to DOM element <code>parent</code>.
079             */
080            protected void serializeElement(XMLStreamWriter writer, EventDataElement element) throws XMLStreamException {
081    
082                    if (!element.isNull()) {
083                            //create a new node for the element and append it to the parent.
084                            String name = getXMLElementName(element.getName());
085                            
086                            if (element.isEmpty()) {
087                                    writer.writeEmptyElement(name);
088                                    //TODO: remove when stax bug is fixed.
089                                    //this is a workaround for a bug in the 1.2 StAX implementation, where
090                                    //if the only element in your document is empty, the closing "/>" never gets written.
091                                    //any other API call fixes the problem, so here we do a no-op string append to force
092                                    //the element closed.
093                                    writer.writeCharacters(DUMMY_TEXT, 0, 0);
094                            } else {
095                                    writer.writeStartElement(name);
096            
097                                    //add attributes for properties.
098                                    Iterator<? extends Entry<String,Object>> props = element.iterateProperties();
099                                    while (props.hasNext()) {
100                                            Entry<String,Object> prop = props.next();
101                                            Object value = prop.getValue();
102                                            if (value != null) {
103                                                    String propName = getXMLElementName(prop.getKey());
104                                                    writer.writeAttribute(propName, value.toString());
105                                            }
106                                    }
107            
108                                    //add text to the element if applicable.
109                                    String text = element.getText();
110                                    if (text != null)
111                                            writer.writeCharacters(text);
112                                    
113                                    //add child elements for children.
114                                    Iterator<EventDataElement> children = element.iterateChildren();
115                                    while (children.hasNext())
116                                            serializeElement(writer, children.next());
117                                    writer.writeEndElement();
118                            }
119                    }
120            }
121            
122            /**
123             * Convert the given input text into a valid XML entity name:
124             * <ul>
125             * <li>all letter characters are preserved</li>
126             * <li>digit characters after the first letter character are preserved</li>
127             * <li>leading non-letter characters are discarded</li>
128             * <li>trailing non-letter/digit characters are discarded</li>
129             * <li>all other sequences of non-letter/digit characters are converted to hyphens</li>
130             * </ul>
131             * 
132             * If the above conversion rules yield an empty string, the static string "data" is
133             * returned instead.
134             */
135            private static String getXMLElementName(String name) {
136    
137                    synchronized (nameCache) {
138                            String cached = nameCache.get(name);
139                            if (cached != null)
140                                    return cached;
141                    }
142                    
143                    //accumulates return value.
144                    StringBuffer buf = new StringBuffer();
145    
146                    //recognizer state machine that chews up an arbitrary string and
147                    //spits out a valid XML element name.  recognizer is always in one of three
148                    //states:  
149                    //  'e'psilon while no characters are yet in the output, 
150                    //  'i'nterior while there are some valid name characters,
151                    //  'h'yphenated while encountering invalid name characters
152                    
153                    char state = 'e';
154                    for (int i = 0; i < name.length(); ++i) {
155                            char c = name.charAt(i);
156                            
157                            switch (state) {
158                            case 'e':
159                                    //beginning of string.  ignore everything up to the first character for the name.
160                                    if (isLetter(c)) {
161                                            //found a character, transition to 'i'nterior state.
162                                            buf.append(c);
163                                            state = 'i';
164                                    }
165                                    break;
166                                    
167                            case 'i':
168                                    //letters or digits ok after the first character.
169                                    if (isLetterOrDigit(c)) {
170                                            buf.append(c);
171                                    } else {
172                                            //invalid name character.  convert to hyphen and absorb all invalid characters
173                                            //that follow by falling into the 'h'yphenated state.
174                                            buf.append('-');
175                                            state = 'h';
176                                    }
177                                    break;
178                                    
179                            case 'h':
180                                    //hyphenated state, absorb invalid characters.
181                                    if (isLetterOrDigit(c)) {
182                                            //we have a valid character, back to 'i'interior state.
183                                            buf.append(c);
184                                            state = 'i';
185                                    }
186                                    break;
187                                    
188                            }
189                    }
190    
191                    //strip trailing '-' from the generated name.
192                    int length = buf.length();
193                    if (length > 0 && buf.charAt(length -1) == '-')
194                            buf.setLength(--length);
195                    
196                    //input was just numbers or other gobbledigook.  return default value for element name.
197                    if (length == 0)
198                            buf.append("data");
199                    
200                    //we have a usable name, return it.
201                    String ret = buf.toString();
202                    synchronized (nameCache) {
203                            nameCache.put(name, ret);
204                    }
205                    return ret;
206            }
207    
208            /** cache computed values to speed up processing */
209            private static HashMap<String,String> nameCache = new LinkedHashMap<String,String>() {
210                    private static final long serialVersionUID = 8470335497980720176L;
211                    @Override
212                    protected boolean removeEldestEntry(Entry<String, String> eldest) {
213                            return size() > 10000;
214                    }
215            };
216            
217    }