Total members 11889 |It is currently Fri Mar 29, 2024 10:32 am Login / Join Codemiles

Java

C/C++

PHP

C#

HTML

CSS

ASP

Javascript

JQuery

AJAX

XSD

Python

Matlab

R Scripts

Weka





Read XML file content using SAX and writing its content as SQL queries on a file. in this example am using SVN log xml.
java code
/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package bigdatasvn;

import javax.xml.parsers.SAXParser;
import org.xml.sax.Attributes;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.PrintWriter;
import java.sql.Connection;
import java.sql.DriverManager;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.Date;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
/**
*
* @author mohamed
*/
public class BigDataSVN extends DefaultHandler {

public PrintWriter writerIndexLog;
public PrintWriter qeueryIndexLog;
public long counter=0;
public long pathCounter=0;
private Connection connect = null;
private Statement statement = null;
private PreparedStatement preparedStatement = null;
private ResultSet resultSet = null;
private SVNLog svnLog;
FilePath filePath;
String logXmlFileName;
String tmpValue;

/**
* @param args the command line arguments
*/
public BigDataSVN(String xmlFileName) throws Exception {
this.logXmlFileName = xmlFileName;
loadXMLFiles();
}


public static void main(String[] args) throws Exception {
//BigDataSVN bigDataSVN = new BigDataSVN("C:/website_3/Dec/asf.xml");
BigDataSVN bigDataSVN = new BigDataSVN("./asf.xml");

}

public void loadXMLFiles() throws Exception {
try {
Class.forName("com.mysql.jdbc.Driver");
// Setup the connection with the DB
connect = DriverManager
.getConnection("jdbc:mysql://localhost/cisc835_nasir?"
+ "user=cisc835_nasir&password=Replication123");

writerIndexLog = new PrintWriter("LogSvn.txt");
qeueryIndexLog = new PrintWriter("LogSvnQueries.sql");



loadXMLStream();

}




catch (SQLException ex) {
writerIndexLog.println(new Date()+" SQL ERROR :Index:"+counter+" : "+ex);
} catch (ClassNotFoundException ex) {
System.out.println(ex);
writerIndexLog.println(new Date()+":ClassNotFoundException: :Index:"+counter+ex);
} catch (FileNotFoundException ex) {
writerIndexLog.println(new Date()+":FileNotFoundException ::Index:"+counter+ex);

}
}

public void loadXMLStream() throws Exception {

SAXParserFactory factory = SAXParserFactory.newInstance();
try {
SAXParser parser = factory.newSAXParser();
parser.parse(logXmlFileName, this);
} catch (ParserConfigurationException e) {
writerIndexLog.println(new Date()+" SQL ERROR :Index:"+counter+" : "+e);

} catch (SAXException e) {
writerIndexLog.println(new Date()+" SAX ERROR :Index:"+counter+" : "+e);

} catch (IOException e) {
writerIndexLog.println(new Date()+" IOEXception ERROR :Index:"+counter+" : "+e);

}


}


@Override
public void startElement(String s, String s1, String elementName, Attributes attributes) throws SAXException {
// if current element is book , create new book
// clear tmpValue on start of element

if (elementName.equalsIgnoreCase("logentry")) {
svnLog = null;
svnLog = new SVNLog();
svnLog.setRevisionNumber(attributes.getValue("revision"));

}
// if current element is publisher
if (elementName.equalsIgnoreCase("path")) {
filePath=null;
filePath = new FilePath();
filePath.setAction(attributes.getValue("action"));
filePath.setKind(attributes.getValue("kind"));

}
}
@Override
public void endElement(String s, String s1, String element) throws SAXException {
// if end of book element add to list
if (element.equals("logentry")) {
//System.out.println(svnLog.toString());
try {


insertToDatabase();
//insert data
} catch (SQLException ex) {
Logger.getLogger(BigDataSVN.class.getName()).log(Level.SEVERE, null, ex);
System.out.println(ex);
} catch (Exception ex) {
Logger.getLogger(BigDataSVN.class.getName()).log(Level.SEVERE, null, ex);
System.out.println(ex);
}
}
if (element.equalsIgnoreCase("author")) {
svnLog.setAuthor(tmpValue);
}
if (element.equalsIgnoreCase("date")) {
svnLog.setCommitDate(tmpValue);
}
if(element.equalsIgnoreCase("msg")){
svnLog.setCommitMessage(tmpValue);

Pattern pattern = Pattern.compile("[A-Z]+[-][0-9]+");
Matcher matcher = pattern.matcher(svnLog.getCommitMessage());



if (matcher.find()) {
svnLog.setIssueID(matcher.group(0));
}
}
if(element.equalsIgnoreCase("path")){
filePath.setFilepath(tmpValue);
svnLog.getFilePaths().add(filePath);
}

}
@Override
public void characters(char[] ac, int i, int j) throws SAXException {
tmpValue = new String(ac, i, j);
}

public void insertToDatabase() throws SQLException, Exception {
try {
// This will load the MySQL driver, each DB has its own driver

// Statements allow to issue SQL queries to the database

// Result set get the result of the SQL query

if(counter%1000==0)
{
System.gc();
}
// PreparedStatements can use variables and are more efficient
preparedStatement = connect
.prepareStatement("insert into 'version_logs' values ("
+ "?,"
+ " ?,"
+ " ?,"
+ " ?, "
+ "? ,"
+ " ?"
+ ")");
// "myuser, webpage, datum, summary, COMMENTS from FEEDBACK.COMMENTS");
// Parameters start with 1
preparedStatement.setLong(1, ++counter);
preparedStatement.setString(2, svnLog.getIssueID());
preparedStatement.setString(3, svnLog.getRevisionNumber());
preparedStatement.setString(4, svnLog.getAuthor());
preparedStatement.setString(5, svnLog.getCommitDate());
preparedStatement.setString(6, svnLog.getCommitMessage());

qeueryIndexLog.println(preparedStatement.toString());



//preparedStatement.executeUpdate();




if (svnLog.getFilePaths().size() > 0) {

for (FilePath filePath : svnLog.getFilePaths()) {
preparedStatement = connect
.prepareStatement("insert into 'version_logs_filepaths' values ("
+ "?,"
+ " ?, "
+ " ?, "
+ " ?, "
+ "? "
+ ")");


preparedStatement.setLong(1, ++pathCounter);

preparedStatement.setLong(2, counter);

preparedStatement.setString(3, filePath.getKind());
preparedStatement.setString(4, filePath.getAction());
preparedStatement.setString(5, filePath.getFilepath());
qeueryIndexLog.println(preparedStatement.toString());
//preparedStatement.executeUpdate();


}


preparedStatement=null;

}


} catch (Exception e) {
throw e;
} finally {


}
}

public File[] getAllFiles(String path) {
File folder = new File(path);
File[] listOfFiles = folder.listFiles();


return listOfFiles;
}
}

Sample Example of the file :
xml code
<log>
<logentry
revision="1525673">
<author>bobby</author>
<date>2013-09-23T18:39:47.210043Z</date>
<paths>
<path
kind="file"
action="M">/hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapred/TestQueue.java</path>
<path
kind="file"
action="M">/hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt</path>
</paths>
<msg>svn merge -c 1525670 FIXES: MAPREDUCE-5522. Incorrect oreder expected from JobQueueInfo (Jinghui Wang via bobby)
</msg>
</logentry>
</log>




_________________
M. S. Rakha, Ph.D.
Queen's University
Canada


Author:
Mastermind
User avatar Posts: 2715
Have thanks: 74 time
Post new topic Reply to topic  [ 1 post ] 

  Related Posts  to : Read XML file content using SAX and writing its as SQL
 Read file content to StringBuffer String object     -  
 Reading and Writing To text file     -  
 Read the content from directory     -  
 display the content of text file     -  
 Load file and update content using AJAX and JQuery     -  
 Read csv file     -  
 read from file in C++     -  
 Read Binary File in C++     -  
 Read and write CSV file     -  
 read file in matlab     -  



Topic Tags

Java XML
cron





Powered by phpBB © 2000, 2002, 2005, 2007 phpBB Group
All copyrights reserved to codemiles.com 2007-2011
mileX v1.0 designed by codemiles team
Codemiles.com is a participant in the Amazon Services LLC Associates Program, an affiliate advertising program designed to provide a means for sites to earn advertising fees by advertising and linking to Amazon.com