001 package com.croftsoft.core.text.sml; 002 003 import java.io.*; 004 import java.util.zip.ZipInputStream; 005 006 import com.croftsoft.core.lang.NullArgumentException; 007 import com.croftsoft.core.text.sml.SmlNode; 008 import com.croftsoft.core.util.consumer.Consumer; 009 010 /********************************************************************* 011 * Used to parse large SML files one data record at a time. 012 * 013 * <p> 014 * The input is assumed to be a very large Simplified Markup Language 015 * (SML) file consisting of a root node containing zero or more child 016 * nodes, each child representing an individual data record. As each 017 * direct child of the top-level root element is parsed, it is passed 018 * as an SmlNode to a Consumer. 019 * </p> 020 * 021 * <p> 022 * This parser is useful when you have a database dump in SML 023 * format and you want to read it back in one data record at a time. 024 * Since the data file is large, your Consumer implementation will 025 * typically commit each record to secondary storage (disk or database) 026 * as it is received. This prevents an out-of-memory condition that 027 * might result from loading the entire file into primary storage 028 * (memory) as a Document Object Model (DOM), an object graph composed 029 * of a root node and multiple child nodes, as it is being parsed. 030 * </p> 031 * 032 * <p> 033 * Example: 034 * <pre> 035 * SmlNodeLoader.load ( smlInputStream, 036 * new Consumer ( ) 037 * { 038 * public void consume ( Object o ) 039 * { 040 * SmlNode smlNode = ( SmlNode ) o; 041 * 042 * User user = User.fromSmlNode ( smlNode ); 043 * 044 * userDatabase.add ( user ); 045 * } 046 * } ); 047 * </pre> 048 * </p> 049 * 050 * <p> 051 * It is assumed that an SML node will have never have both character 052 * data and SML nodes mixed together as immediate children. Given that 053 * assumption, this parser will overwrite a parsed String child with a 054 * subsequently parsed SmlNode child. Additionally, character data will 055 * not be recorded as a child once an SmlNode child is already in place. 056 * This is useful for preventing unnecessary white space between element 057 * tags in the SML file from being stored as character data. 058 * </p> 059 * 060 * @version 061 * 2001-05-18 062 * @since 063 * 2001-05-10 064 * @author 065 * <a href="https://www.croftsoft.com/">David W. Croft</a> 066 *********************************************************************/ 067 068 public final class SmlNodeLoader 069 implements SmlParseHandler 070 ////////////////////////////////////////////////////////////////////// 071 ////////////////////////////////////////////////////////////////////// 072 { 073 074 private Consumer smlNodeConsumer; 075 076 private SmlNodeParseHandler smlNodeParseHandler; 077 078 private int depth; 079 080 ////////////////////////////////////////////////////////////////////// 081 ////////////////////////////////////////////////////////////////////// 082 083 public static void main ( String [ ] args ) 084 throws Exception 085 ////////////////////////////////////////////////////////////////////// 086 { 087 parse ( 088 args [ 0 ], 089 new Consumer ( ) 090 { 091 public void consume ( Object o ) 092 { 093 System.out.println ( o ); 094 095 System.out.println ( "" ); 096 } 097 }, 098 args [ 0 ].toLowerCase ( ).endsWith ( ".zip" ) ); 099 } 100 101 ////////////////////////////////////////////////////////////////////// 102 ////////////////////////////////////////////////////////////////////// 103 104 public static void parse ( 105 InputStream inputStream, 106 Consumer smlNodeConsumer ) 107 throws IOException 108 ////////////////////////////////////////////////////////////////////// 109 { 110 SmlParseHandler smlParseHandler 111 = new SmlNodeLoader ( smlNodeConsumer ); 112 113 SmlNodeLib.parse ( inputStream, smlParseHandler ); 114 } 115 116 public static void parse ( 117 String smlDataFilename, 118 Consumer smlNodeConsumer, 119 boolean isZipFile ) 120 throws IOException 121 ////////////////////////////////////////////////////////////////////// 122 { 123 InputStream inputStream = null; 124 125 try 126 { 127 inputStream = new BufferedInputStream ( 128 new FileInputStream ( smlDataFilename ) ); 129 130 if ( isZipFile ) 131 { 132 ZipInputStream zipInputStream 133 = new ZipInputStream ( inputStream ); 134 135 zipInputStream.getNextEntry ( ); 136 137 inputStream = zipInputStream; 138 } 139 140 parse ( inputStream, smlNodeConsumer ); 141 } 142 finally 143 { 144 if ( inputStream != null ) 145 { 146 inputStream.close ( ); 147 } 148 } 149 } 150 151 ////////////////////////////////////////////////////////////////////// 152 ////////////////////////////////////////////////////////////////////// 153 154 private SmlNodeLoader ( Consumer smlNodeConsumer ) 155 ////////////////////////////////////////////////////////////////////// 156 { 157 NullArgumentException.check ( 158 this.smlNodeConsumer = smlNodeConsumer ); 159 160 smlNodeParseHandler = new SmlNodeParseHandler ( ); 161 } 162 163 ////////////////////////////////////////////////////////////////////// 164 ////////////////////////////////////////////////////////////////////// 165 166 public void handleCData ( String cData ) 167 ////////////////////////////////////////////////////////////////////// 168 { 169 smlNodeParseHandler.handleCData ( cData ); 170 } 171 172 public void handleElementOpen ( String elementName ) 173 ////////////////////////////////////////////////////////////////////// 174 { 175 ++depth; 176 177 smlNodeParseHandler.handleElementOpen ( elementName ); 178 } 179 180 public void handleElementClose ( String elementName ) 181 ////////////////////////////////////////////////////////////////////// 182 { 183 --depth; 184 185 if ( depth == 1 ) 186 { 187 smlNodeConsumer.consume ( smlNodeParseHandler.getSmlNode ( ) ); 188 189 smlNodeParseHandler.handleElementClose ( elementName ); 190 191 smlNodeParseHandler.getSmlNode ( ).removeChildren ( ); 192 } 193 else 194 { 195 smlNodeParseHandler.handleElementClose ( elementName ); 196 } 197 } 198 199 public void handleParseError ( ) 200 ////////////////////////////////////////////////////////////////////// 201 { 202 smlNodeParseHandler.handleParseError ( ); 203 } 204 205 ////////////////////////////////////////////////////////////////////// 206 ////////////////////////////////////////////////////////////////////// 207 }