001 /* 002 * Copyright 2008-2009 the original author or authors. 003 * The contents of this file are subject to the Mozilla Public License 004 * Version 1.1 (the "License"); you may not use this file except in 005 * compliance with the License. You may obtain a copy of the License at 006 * http://www.mozilla.org/MPL/ 007 * 008 * Software distributed under the License is distributed on an "AS IS" 009 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the 010 * License for the specific language governing rights and limitations 011 * under the License. 012 */ 013 014 package com.mtgi.analytics; 015 016 import static java.lang.Character.isLetter; 017 import static java.lang.Character.isLetterOrDigit; 018 019 import java.io.StringWriter; 020 import java.util.HashMap; 021 import java.util.Iterator; 022 import java.util.LinkedHashMap; 023 import java.util.Map.Entry; 024 025 import javax.xml.stream.XMLOutputFactory; 026 import javax.xml.stream.XMLStreamException; 027 import javax.xml.stream.XMLStreamWriter; 028 029 public class EventDataElementSerializer { 030 031 private static final char[] DUMMY_TEXT = {}; 032 033 private XMLOutputFactory factory; 034 private StringWriter buffer; 035 036 public EventDataElementSerializer(XMLOutputFactory factory) { 037 this.buffer = new StringWriter(); 038 this.factory = factory; 039 } 040 041 /** 042 * Serialize the given event data as a standalone XML document 043 * @param includeProlog if true, include an XML prolog; if not, just render a document fragment 044 * @return the XML serialization, or null if <code>data</code> is null. 045 */ 046 public String serialize(EventDataElement data, boolean includeProlog) { 047 if (data == null || data.isNull()) 048 return null; 049 050 try { 051 //serialize the DOM to our string buffer. 052 XMLStreamWriter writer = factory.createXMLStreamWriter(buffer); 053 try { 054 if (includeProlog) 055 writer.writeStartDocument(); 056 serializeElement(writer, data); 057 writer.writeEndDocument(); 058 writer.flush(); 059 } finally { 060 writer.close(); 061 } 062 063 //return buffer contents. 064 return buffer.toString(); 065 066 } catch (XMLStreamException ioe) { 067 //this shouldn't happen, since the target output stream is a StringWriter, but 068 //the compiler demands that we handle it. 069 throw new RuntimeException("Error serializing XML data", ioe); 070 } finally { 071 072 //reset the internal buffer for next run. 073 buffer.getBuffer().setLength(0); 074 } 075 } 076 077 /** 078 * Recursively serialize a single element, appending it to DOM element <code>parent</code>. 079 */ 080 protected void serializeElement(XMLStreamWriter writer, EventDataElement element) throws XMLStreamException { 081 082 if (!element.isNull()) { 083 //create a new node for the element and append it to the parent. 084 String name = getXMLElementName(element.getName()); 085 086 if (element.isEmpty()) { 087 writer.writeEmptyElement(name); 088 //TODO: remove when stax bug is fixed. 089 //this is a workaround for a bug in the 1.2 StAX implementation, where 090 //if the only element in your document is empty, the closing "/>" never gets written. 091 //any other API call fixes the problem, so here we do a no-op string append to force 092 //the element closed. 093 writer.writeCharacters(DUMMY_TEXT, 0, 0); 094 } else { 095 writer.writeStartElement(name); 096 097 //add attributes for properties. 098 Iterator<? extends Entry<String,Object>> props = element.iterateProperties(); 099 while (props.hasNext()) { 100 Entry<String,Object> prop = props.next(); 101 Object value = prop.getValue(); 102 if (value != null) { 103 String propName = getXMLElementName(prop.getKey()); 104 writer.writeAttribute(propName, value.toString()); 105 } 106 } 107 108 //add text to the element if applicable. 109 String text = element.getText(); 110 if (text != null) 111 writer.writeCharacters(text); 112 113 //add child elements for children. 114 Iterator<EventDataElement> children = element.iterateChildren(); 115 while (children.hasNext()) 116 serializeElement(writer, children.next()); 117 writer.writeEndElement(); 118 } 119 } 120 } 121 122 /** 123 * Convert the given input text into a valid XML entity name: 124 * <ul> 125 * <li>all letter characters are preserved</li> 126 * <li>digit characters after the first letter character are preserved</li> 127 * <li>leading non-letter characters are discarded</li> 128 * <li>trailing non-letter/digit characters are discarded</li> 129 * <li>all other sequences of non-letter/digit characters are converted to hyphens</li> 130 * </ul> 131 * 132 * If the above conversion rules yield an empty string, the static string "data" is 133 * returned instead. 134 */ 135 private static String getXMLElementName(String name) { 136 137 synchronized (nameCache) { 138 String cached = nameCache.get(name); 139 if (cached != null) 140 return cached; 141 } 142 143 //accumulates return value. 144 StringBuffer buf = new StringBuffer(); 145 146 //recognizer state machine that chews up an arbitrary string and 147 //spits out a valid XML element name. recognizer is always in one of three 148 //states: 149 // 'e'psilon while no characters are yet in the output, 150 // 'i'nterior while there are some valid name characters, 151 // 'h'yphenated while encountering invalid name characters 152 153 char state = 'e'; 154 for (int i = 0; i < name.length(); ++i) { 155 char c = name.charAt(i); 156 157 switch (state) { 158 case 'e': 159 //beginning of string. ignore everything up to the first character for the name. 160 if (isLetter(c)) { 161 //found a character, transition to 'i'nterior state. 162 buf.append(c); 163 state = 'i'; 164 } 165 break; 166 167 case 'i': 168 //letters or digits ok after the first character. 169 if (isLetterOrDigit(c)) { 170 buf.append(c); 171 } else { 172 //invalid name character. convert to hyphen and absorb all invalid characters 173 //that follow by falling into the 'h'yphenated state. 174 buf.append('-'); 175 state = 'h'; 176 } 177 break; 178 179 case 'h': 180 //hyphenated state, absorb invalid characters. 181 if (isLetterOrDigit(c)) { 182 //we have a valid character, back to 'i'interior state. 183 buf.append(c); 184 state = 'i'; 185 } 186 break; 187 188 } 189 } 190 191 //strip trailing '-' from the generated name. 192 int length = buf.length(); 193 if (length > 0 && buf.charAt(length -1) == '-') 194 buf.setLength(--length); 195 196 //input was just numbers or other gobbledigook. return default value for element name. 197 if (length == 0) 198 buf.append("data"); 199 200 //we have a usable name, return it. 201 String ret = buf.toString(); 202 synchronized (nameCache) { 203 nameCache.put(name, ret); 204 } 205 return ret; 206 } 207 208 /** cache computed values to speed up processing */ 209 private static HashMap<String,String> nameCache = new LinkedHashMap<String,String>() { 210 private static final long serialVersionUID = 8470335497980720176L; 211 @Override 212 protected boolean removeEldestEntry(Entry<String, String> eldest) { 213 return size() > 10000; 214 } 215 }; 216 217 }