import javax.xml.parsers.*; import org.xml.sax.*; import org.xml.sax.helpers.*; public class ODPParser extends DefaultHandler{ boolean inTopic=false; boolean inTopicCatid=false; long dbTopicId=0; String dbTopicName; boolean inExternalPage=false; boolean inExternalPageTitle=false; boolean inExternalPageDescription=false; boolean inExternalPageTopic=false; long dbPageId=0; String dbPageUri; String dbPageTitle; String dbPageDescription; String dbPageTopic; public void startElement(String namespace, String local, String element, Attributes attributes) { // if element begins if (element.equals("Topic")) { inTopic=true; if (attributes.getValue("r:id") != null) { dbTopicName=attributes.getValue("r:id"); } } // if inside element begins if (inTopic && element.equals("catid")) { inTopicCatid=true; } // if inside if (element.equals("ExternalPage")) { inExternalPage=true; if (attributes.getValue("about") != null) { dbPageUri=attributes.getValue("about"); } } // if inside element begins if (inExternalPage && element.equals("d:Title")) { inExternalPageTitle=true; } // if inside element begins if (inExternalPage && element.equals("d:Description")) { inExternalPageDescription=true; } // if inside element begins if (inExternalPage && element.equals("topic")) { inExternalPageTopic=true; } } public void endElement(String namespace, String local, String element) { // if element ends if (element.equals("Topic")) { inTopic=false; if (dbTopicName!=null) { if (!dbTopicName.equals("")) { dbTopicId++; //System.out.println(""+dbTopicId+"\t"+dbTopicName); } } } // if inside element ends if (inTopic && element.equals("catid")) { inTopicCatid=false; } // if element ends if (element.equals("ExternalPage")) { inExternalPage=false; if (dbPageUri==null) { return; } else { if (dbPageUri.equals("")) { return; } } if (dbPageTitle==null) { dbPageTitle="NULL"; } if (dbPageDescription==null) { dbPageDescription="NULL"; } if (dbPageTopic==null) { return; } else { if (dbPageTopic.equals("")) { return; } } dbPageId++; System.out.println(""+dbPageId+"\t"+dbPageUri+"\t"+dbPageTitle+"\t"+dbPageDescription+"\t"+dbTopicId); } // if inside element ends if (inExternalPage && element.equals("d:Title")) { inExternalPageTitle=false; } // if inside element ends if (inExternalPage && element.equals("d:Description")) { inExternalPageDescription=false; } // if inside element ends if (inExternalPage && element.equals("topic")) { inExternalPageTopic=false; } } public void characters(char[] chars, int beginning, int length) { // if inside and inside if(inTopicCatid) { //dbTopicId=Integer.parseInt(new String(chars, beginning, length)); } // if inside and inside if(inExternalPageTitle) { dbPageTitle=new String(chars, beginning, length); } // if inside and inside if(inExternalPageDescription) { dbPageDescription=new String(chars, beginning, length); } // if inside and inside if(inExternalPageTopic) { dbPageTopic=new String(chars, beginning, length); } } public static void main(String args[]) throws Exception { if (args.length==1) { XMLReader reader=SAXParserFactory.newInstance().newSAXParser().getXMLReader(); reader.setContentHandler(new ODPParser()); reader.parse(args[0]); } else { System.err.println("Kasuta kujul java ODPParser andmefail.xml"); System.exit(0); } } }