001 package com.croftsoft.core.text.sml;
002
003 import java.io.*;
004 import java.util.zip.ZipInputStream;
005
006 import com.croftsoft.core.lang.NullArgumentException;
007 import com.croftsoft.core.text.sml.SmlNode;
008 import com.croftsoft.core.util.consumer.Consumer;
009
010 /*********************************************************************
011 * Used to parse large SML files one data record at a time.
012 *
013 * <p>
014 * The input is assumed to be a very large Simplified Markup Language
015 * (SML) file consisting of a root node containing zero or more child
016 * nodes, each child representing an individual data record. As each
017 * direct child of the top-level root element is parsed, it is passed
018 * as an SmlNode to a Consumer.
019 * </p>
020 *
021 * <p>
022 * This parser is useful when you have a database dump in SML
023 * format and you want to read it back in one data record at a time.
024 * Since the data file is large, your Consumer implementation will
025 * typically commit each record to secondary storage (disk or database)
026 * as it is received. This prevents an out-of-memory condition that
027 * might result from loading the entire file into primary storage
028 * (memory) as a Document Object Model (DOM), an object graph composed
029 * of a root node and multiple child nodes, as it is being parsed.
030 * </p>
031 *
032 * <p>
033 * Example:
034 * <pre>
035 * SmlNodeLoader.load ( smlInputStream,
036 * new Consumer ( )
037 * {
038 * public void consume ( Object o )
039 * {
040 * SmlNode smlNode = ( SmlNode ) o;
041 *
042 * User user = User.fromSmlNode ( smlNode );
043 *
044 * userDatabase.add ( user );
045 * }
046 * } );
047 * </pre>
048 * </p>
049 *
050 * <p>
051 * It is assumed that an SML node will have never have both character
052 * data and SML nodes mixed together as immediate children. Given that
053 * assumption, this parser will overwrite a parsed String child with a
054 * subsequently parsed SmlNode child. Additionally, character data will
055 * not be recorded as a child once an SmlNode child is already in place.
056 * This is useful for preventing unnecessary white space between element
057 * tags in the SML file from being stored as character data.
058 * </p>
059 *
060 * @version
061 * 2001-05-18
062 * @since
063 * 2001-05-10
064 * @author
065 * <a href="https://www.croftsoft.com/">David W. Croft</a>
066 *********************************************************************/
067
068 public final class SmlNodeLoader
069 implements SmlParseHandler
070 //////////////////////////////////////////////////////////////////////
071 //////////////////////////////////////////////////////////////////////
072 {
073
074 private Consumer smlNodeConsumer;
075
076 private SmlNodeParseHandler smlNodeParseHandler;
077
078 private int depth;
079
080 //////////////////////////////////////////////////////////////////////
081 //////////////////////////////////////////////////////////////////////
082
083 public static void main ( String [ ] args )
084 throws Exception
085 //////////////////////////////////////////////////////////////////////
086 {
087 parse (
088 args [ 0 ],
089 new Consumer ( )
090 {
091 public void consume ( Object o )
092 {
093 System.out.println ( o );
094
095 System.out.println ( "" );
096 }
097 },
098 args [ 0 ].toLowerCase ( ).endsWith ( ".zip" ) );
099 }
100
101 //////////////////////////////////////////////////////////////////////
102 //////////////////////////////////////////////////////////////////////
103
104 public static void parse (
105 InputStream inputStream,
106 Consumer smlNodeConsumer )
107 throws IOException
108 //////////////////////////////////////////////////////////////////////
109 {
110 SmlParseHandler smlParseHandler
111 = new SmlNodeLoader ( smlNodeConsumer );
112
113 SmlNodeLib.parse ( inputStream, smlParseHandler );
114 }
115
116 public static void parse (
117 String smlDataFilename,
118 Consumer smlNodeConsumer,
119 boolean isZipFile )
120 throws IOException
121 //////////////////////////////////////////////////////////////////////
122 {
123 InputStream inputStream = null;
124
125 try
126 {
127 inputStream = new BufferedInputStream (
128 new FileInputStream ( smlDataFilename ) );
129
130 if ( isZipFile )
131 {
132 ZipInputStream zipInputStream
133 = new ZipInputStream ( inputStream );
134
135 zipInputStream.getNextEntry ( );
136
137 inputStream = zipInputStream;
138 }
139
140 parse ( inputStream, smlNodeConsumer );
141 }
142 finally
143 {
144 if ( inputStream != null )
145 {
146 inputStream.close ( );
147 }
148 }
149 }
150
151 //////////////////////////////////////////////////////////////////////
152 //////////////////////////////////////////////////////////////////////
153
154 private SmlNodeLoader ( Consumer smlNodeConsumer )
155 //////////////////////////////////////////////////////////////////////
156 {
157 NullArgumentException.check (
158 this.smlNodeConsumer = smlNodeConsumer );
159
160 smlNodeParseHandler = new SmlNodeParseHandler ( );
161 }
162
163 //////////////////////////////////////////////////////////////////////
164 //////////////////////////////////////////////////////////////////////
165
166 public void handleCData ( String cData )
167 //////////////////////////////////////////////////////////////////////
168 {
169 smlNodeParseHandler.handleCData ( cData );
170 }
171
172 public void handleElementOpen ( String elementName )
173 //////////////////////////////////////////////////////////////////////
174 {
175 ++depth;
176
177 smlNodeParseHandler.handleElementOpen ( elementName );
178 }
179
180 public void handleElementClose ( String elementName )
181 //////////////////////////////////////////////////////////////////////
182 {
183 --depth;
184
185 if ( depth == 1 )
186 {
187 smlNodeConsumer.consume ( smlNodeParseHandler.getSmlNode ( ) );
188
189 smlNodeParseHandler.handleElementClose ( elementName );
190
191 smlNodeParseHandler.getSmlNode ( ).removeChildren ( );
192 }
193 else
194 {
195 smlNodeParseHandler.handleElementClose ( elementName );
196 }
197 }
198
199 public void handleParseError ( )
200 //////////////////////////////////////////////////////////////////////
201 {
202 smlNodeParseHandler.handleParseError ( );
203 }
204
205 //////////////////////////////////////////////////////////////////////
206 //////////////////////////////////////////////////////////////////////
207 }