Sunday, 29 September 2019

Create Product sitemap in AEM

Creating Sitemap for an eCommerce website helps in indexing all the product pages by search engine crawlers.

Let's see how to create sitemap from an product index file.

Create a service config file
Create a scheduler
Create sitemap read and write service interface and implementation
Create models for parsing the index xml file

Sample XML file hosted on a server, we will be configuring this XML hosted URL in Scheduler 
<aemquickstart
xmlns:xs="http://www.w3.org/2001/XMLSchema" version="2.0">
<channel>
<Item>
<title>
<![CDATA[ AEM Quickstart by Kishore ]]>
</title>
<ProductId>12345</ProductId>
<pubDate>02/28/2017 00:00:00.000000</pubDate>
</Item>
<Item>
<title>
<![CDATA[ Lorel Ipsum ]]>
</title>
<ProductId>56789</ProductId>
<pubDate>02/28/2019 00:00:00.000000</pubDate>
</Item>
<Item>
<title>
<![CDATA[ Create Sitemap in AEM ]]>
</title>
<ProductId>12987</ProductId>
<pubDate>03/28/2019 00:00:00.000000</pubDate>
</Item>
</channel>

</aemquickstart>


Create a service config file
package com.aemquickstart.core.configurations;

import org.osgi.service.metatype.annotations.AttributeDefinition;
import org.osgi.service.metatype.annotations.AttributeType;
import org.osgi.service.metatype.annotations.ObjectClassDefinition;

/*
 * @author Kishore Polsani
 */
@ObjectClassDefinition(name = "AEM Quickstart Sitemap Configuration", description = "This configuration helps in creating a product sitemap, reading data from URL")
public @interface SitemapConfiguration {

 @AttributeDefinition(name = "Scheduler name", description = "Name of the scheduler", type = AttributeType.STRING)
 public String name() default "XML Reader Scheduler";

 @AttributeDefinition(name = "Enabled", description = "Flag to enable/disable a scheduler", type = AttributeType.BOOLEAN)
 public boolean enabled() default true;

 @AttributeDefinition(name = "Cron expression", description = "Cron expression used by the scheduler", type = AttributeType.STRING)
 public String cronExpression() default "0 * * * * ?";

 @AttributeDefinition(name = "XML file path", description = "Path of the XML file on the system", type = AttributeType.STRING)
 public String xmlFilePath();

 @AttributeDefinition(name = "XML product index file URL", description = "URL from where XML response is to be read", type = AttributeType.STRING)
 public String xmlResponseURL();

 @AttributeDefinition(name = "JCR path", description = "Path in the JCR to store data", type = AttributeType.STRING)
 public String jcrPath() default "/content/aemquickstart/en";

 @AttributeDefinition(name = "Enter Domain", description = "Enter domain to be used in  attribute.", type = AttributeType.STRING)
 public String domain() default "https://localhost";
}

Create a scheduler
package com.aemquickstart.core.schedulers;

import org.apache.sling.commons.scheduler.ScheduleOptions;
import org.apache.sling.commons.scheduler.Scheduler;
import org.osgi.service.component.annotations.Activate;
import org.osgi.service.component.annotations.Component;
import org.osgi.service.component.annotations.Deactivate;
import org.osgi.service.component.annotations.Modified;
import org.osgi.service.component.annotations.Reference;
import org.osgi.service.metatype.annotations.Designate;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.aemquickstart.core.configurations.SitemapConfiguration;
import com.aemquickstart.core.models.ProductList;
import com.aemquickstart.core.services.SitemapXmlReaderService;
import com.aemquickstart.core.services.SitemapXmlWriterService;

/**
 * @author Kishore Polsani
 *
 */
@Component(immediate = true, service = Runnable.class)
@Designate(ocd = SitemapConfiguration.class)
public class SitemapScheduler implements Runnable {

 private final Logger log = LoggerFactory.getLogger(this.getClass());

 private int schedulerId; // Id of the scheduler based on its name

 @Reference
 private Scheduler scheduler;

 @Reference
 private SitemapXmlReaderService sitemapXmlReaderService;

 @Reference
 private SitemapXmlWriterService sitemapXmlWriterService;

 private String filePath; // XML file from where sitemap data to be read

 private String productIndexFileUrl; // URL from where sitemap data to be read

 private boolean isEnabled;

 private String jcrPath;

 private String domain;

 /**
  * Activate method to initialize sitemap
  * 
  * @param sitemapXmlReaderConfiguration
  */
 @Activate
 protected void activate(SitemapConfiguration sitemapXmlReaderConfiguration) {
  schedulerId = sitemapXmlReaderConfiguration.name().hashCode();
  filePath = sitemapXmlReaderConfiguration.xmlFilePath();
  productIndexFileUrl = sitemapXmlReaderConfiguration.xmlResponseURL();
  isEnabled = sitemapXmlReaderConfiguration.enabled();
  jcrPath = sitemapXmlReaderConfiguration.jcrPath();
  domain = sitemapXmlReaderConfiguration.domain();
  log.info("Scheduler activated: flag={}", isEnabled);
 }

 /**
  * Modifies the sitemap scheduler id on modification
  * 
  * @param sitemapXmlReaderConfiguration
  */
 @Modified
 protected void modified(SitemapConfiguration sitemapXmlReaderConfiguration) {
  // Removing sitemap scheduler
  removeScheduler();
  // Updating the sitemap scheduler id
  schedulerId = sitemapXmlReaderConfiguration.name().hashCode();
  // Add the sitemap scheduler
  addScheduler(sitemapXmlReaderConfiguration);
 }

 /**
  * This method deactivates the scheduler and removes it
  * 
  * @param sitemapXmlReaderConfiguration
  */
 @Deactivate
 protected void deactivate(SitemapConfiguration sitemapXmlReaderConfiguration) {
  // Removing the scheduler
  removeScheduler();
 }

 /**
  * This method removes the scheduler
  */
 private void removeScheduler() {
  log.info("Removing scheduler: {}", schedulerId);
  // Unscheduling/removing the scheduler
  scheduler.unschedule(String.valueOf(schedulerId));
 }

 /**
  * This method adds the scheduler
  * 
  * @param schedulerConfiguration
  */
 private void addScheduler(SitemapConfiguration xmlReaderConfiguration) {
  if (isEnabled) {
   ScheduleOptions scheduleOptions = scheduler.EXPR(xmlReaderConfiguration.cronExpression());
   scheduleOptions.name(xmlReaderConfiguration.name());
   scheduleOptions.canRunConcurrently(false);
   scheduler.schedule(this, scheduleOptions);
   log.info("Sitemap Scheduler {} is added", schedulerId);
  } else {
   log.info("Sitemap Scheduler {} is disabled", schedulerId);
  }
 }

 /**
  * Overridden run method to execute Job
  */
 @Override
 public void run() {
  log.info("In Scheduler run(), isEnabled:{}", isEnabled);
  if (isEnabled) {
   ProductList productList = null;
   if (productIndexFileUrl != null && !productIndexFileUrl.isEmpty()) {
    log.info("Product XML URL: {}", productIndexFileUrl);
    productList = sitemapXmlReaderService.readXMLFromURL(productIndexFileUrl);
    sitemapXmlWriterService.createProductSiteMap(domain, productList, jcrPath, "url");
   }
  } else {
   log.info("Sitemap Scheduler is not enabled");
  }
 }
}


Create an interface to read the XML file
package com.aemquickstart.core.services;

import com.aemquickstart.core.models.ProductList;

public interface SitemapXmlReaderService {

 /**
  * This method writes XML data into JCR
  */
 public ProductList readXMLFromURL(String responseURL);
}

Create an implementation class to read the XML file
package com.aemquickstart.core.services.impl;

import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.io.StringReader;
import java.net.URL;
import java.net.URLConnection;
import java.nio.charset.Charset;

import javax.xml.bind.JAXBContext;
import javax.xml.bind.JAXBException;
import javax.xml.bind.Unmarshaller;

import org.osgi.service.component.annotations.Component;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.aemquickstart.core.models.ProductList;
import com.aemquickstart.core.services.SitemapXmlReaderService;

@Component(immediate = true, service = SitemapXmlReaderService.class)
public class SitemapXmlReaderServiceImpl implements SitemapXmlReaderService {

 // Logger
 private final Logger log = LoggerFactory.getLogger(this.getClass());

 // JAXB instance
 private JAXBContext jaxbContext;

 // JAXB Unmarshaller
 private Unmarshaller unmarshaller;

 @Override
 public ProductList readXMLFromURL(String responseURL) {
  log.info("In readXMLFromURL");
  URLConnection urlConnection = null;
  InputStreamReader inputStreamReader = null;
  StringBuilder builder = new StringBuilder();
  ProductList productList = null;

  try {
   URL url = new URL(responseURL);
   urlConnection = url.openConnection();
   if (urlConnection != null) {
    urlConnection.setReadTimeout(30 * 1000);
   }
   if (urlConnection != null && urlConnection.getInputStream() != null) {
    inputStreamReader = new InputStreamReader(urlConnection.getInputStream(), Charset.defaultCharset());
    BufferedReader bufferedReader = new BufferedReader(inputStreamReader);
    if (bufferedReader != null) {
     int eof;
     while ((eof = bufferedReader.read()) != -1) {
      builder.append((char) eof);
     }
     bufferedReader.close();
    }
   }
   log.info("closing input stream");
   inputStreamReader.close();
  } catch (Exception e) {
   log.error(e.getMessage(), e);
  }

  String xmlResponse = builder.toString();
  log.debug("xmlResponse: {}", xmlResponse);
  try {

   jaxbContext = JAXBContext.newInstance(ProductList.class);

   unmarshaller = jaxbContext.createUnmarshaller();

   productList = (ProductList) unmarshaller.unmarshal(new StringReader(xmlResponse));
   log.info("ProductList: {}", productList);
  } catch (JAXBException e) {
   log.info(e.getMessage(), e);
  }

  return productList;
 }
}


Create an model class to parse the XML
package com.aemquickstart.core.models;

import javax.xml.bind.annotation.XmlAccessType;
import javax.xml.bind.annotation.XmlAccessorType;
import javax.xml.bind.annotation.XmlElement;
import javax.xml.bind.annotation.XmlRootElement;

@XmlRootElement(name="aemquickstart")
@XmlAccessorType(XmlAccessType.FIELD)
public class ProductList {
    @XmlElement
    private Channel[] channel = new Channel[1]; 

 public Channel[] getChannel() {
  return channel;
 }

 public void setChannel(Channel[] channel) {
  this.channel = channel;
 }

}

Create Channel.java
package com.aemquickstart.core.models;

import javax.xml.bind.annotation.XmlAccessType;
import javax.xml.bind.annotation.XmlAccessorType;
import javax.xml.bind.annotation.XmlElement;
import javax.xml.bind.annotation.XmlRootElement;

@XmlAccessorType(XmlAccessType.FIELD)
@XmlRootElement(name="channel")
public class Channel {
    @XmlElement
    private Item[] Item = new Item[1]; // as the tag name in the xml file..

 public Item[] getItem() {
  return Item;
 }

 public void setItem(Item[] item) {
  Item = item;
 }
}

Create Item.java to read all elements
package com.aemquickstart.core.models;
import javax.xml.bind.annotation.XmlAccessType;
import javax.xml.bind.annotation.XmlAccessorType;
import javax.xml.bind.annotation.XmlElement;
import javax.xml.bind.annotation.XmlRootElement;

@XmlRootElement(name="Item")
@XmlAccessorType(XmlAccessType.FIELD)
public class Item {
    @XmlElement
    private String ProductId; 
    @XmlElement
    private String title; 
    @XmlElement
    private String pubDate;
    //@XmlElement
 public String getProductId() {
  return ProductId;
 }
 public void setProductId(String productId) {
  ProductId = productId;
 }
 public String getTitle() {
  return title;
 }
 public void setTitle(String title) {
  this.title = title;
 }
 public String getPubDate() {
  return pubDate;
 }
 public void setPubDate(String pubDate) {
  this.pubDate = pubDate;
 }
}

Create an interface to write the sitemap to XML file
package com.aemquickstart.core.services;

import com.aemquickstart.core.models.ProductList;

public interface SitemapXmlWriterService {

 public void createProductSiteMap(String domain, ProductList productList, String jcrPath, String from);
}

Create an implementation class to create the sitemap
package com.aemquickstart.core.services.impl;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.InputStream;
import java.util.Calendar;
import java.util.HashMap;
import java.util.Map;

import javax.jcr.Binary;
import javax.jcr.Node;
import javax.jcr.RepositoryException;
import javax.jcr.Session;
import javax.jcr.ValueFactory;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.Result;
import javax.xml.transform.Source;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;

import org.apache.commons.lang3.time.FastDateFormat;
import org.apache.sling.api.resource.LoginException;
import org.apache.sling.api.resource.ResourceResolver;
import org.apache.sling.api.resource.ResourceResolverFactory;
import org.osgi.service.component.annotations.Component;
import org.osgi.service.component.annotations.Reference;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;

import com.aemquickstart.core.models.Channel;
import com.aemquickstart.core.models.Item;
import com.aemquickstart.core.models.ProductList;
import com.aemquickstart.core.services.SitemapXmlWriterService;
import com.day.cq.wcm.api.Page;
import com.day.cq.wcm.api.PageManager;

@Component(immediate = true, service = SitemapXmlWriterService.class)
public class SitemapXmlWriterServiceImpl implements SitemapXmlWriterService {

 // Logger
 private final Logger log = LoggerFactory.getLogger(this.getClass());

 // Injecting ResourceResolverFactory
 @Reference
 private ResourceResolverFactory resourceResolverFactory;

 // JCR session
 private Session session;

 private Session getSession() {
  try {
   // Map for service user details
   ResourceResolver resourceResolver = getResourceResolver();

   // Getting the session by adapting the resourceResolver
   session = resourceResolver.adaptTo(Session.class);

  } catch (LoginException e) {
   log.error(e.getMessage(), e);
  }
  return session;
 }

 private ResourceResolver getResourceResolver() throws LoginException {
  Map xmlReaderMap = new HashMap<>();
  xmlReaderMap.put(ResourceResolverFactory.SUBSERVICE, "aemquickstartSubservice");

  // Getting ResourceResovler
  ResourceResolver resourceResolver = resourceResolverFactory.getServiceResourceResolver(xmlReaderMap);
  return resourceResolver;
 }

 @Override
 public void createProductSiteMap(String domain, ProductList productList, String jcrPath, String from) {
  log.info("createProductSiteMap: {}", from);
  ResourceResolver resourceResolver = null;
  try {
   session = getSession();
   if (!session.itemExists(jcrPath)) {
    log.info("Provided path does not exist. Sitemap file can't be created under {}", jcrPath);
    return;
   } else {
    DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance();
    DocumentBuilder docBuilder = docFactory.newDocumentBuilder();
    Document doc = docBuilder.newDocument();
    doc.setXmlStandalone(true);
    Element rootElement = doc.createElement("urlset");
    rootElement.setAttribute("xmlns", "http://www.sitemaps.org/schemas/sitemap/0.9");
    doc.appendChild(rootElement);
    addProductsToXml(domain, rootElement, jcrPath, doc, productList);
    resourceResolver = getResourceResolver();
    PageManager pgMgr = resourceResolver.adaptTo(PageManager.class);
    Page homepage = pgMgr.getPage(jcrPath);
    if (null != homepage) {
     String sitemapFile = jcrPath + "/sitemap_products.xml";
     generateXmlFile(jcrPath, doc, session, sitemapFile);
    }
   }

  } catch (Exception e) {
   log.error(e.getMessage(), e);
  }
  finally {
    resourceResolver.close();
            if(session != null) {
                session.logout();
            }
  }
 }

 private void addProductsToXml(String domain, Element rootElement, String jcrPath, Document doc,
   ProductList productList) {
  // Getting the products from ProductList
  Channel[] channels = productList.getChannel();

  // Iterate for each item present in the XML file
  log.info("Setting properties");
  Item[] products = channels[0].getItem();
  for (Item product : products) {
   String productId = product.getProductId();
   Element pdpUrlElement = doc.createElement("url");
   Element pdpLoc = doc.createElement("loc");
   String title = product.getTitle();
   String pdpUrl = domain + jcrPath + "/pdp.html/" + title.replaceAll("[^a-zA-Z0-9-]", "") + "/" + productId;
   pdpLoc.appendChild(doc.createTextNode(pdpUrl));
   pdpUrlElement.appendChild(pdpLoc);
   Calendar lastModified = Calendar.getInstance();
   if (null != lastModified) {
    Element pdpLstMod = doc.createElement("lastmod");
    FastDateFormat DATE_FORMAT = FastDateFormat.getInstance("yyyy-MM-dd");
    pdpLstMod.appendChild(doc.createTextNode(DATE_FORMAT.format(lastModified.getTimeInMillis())));
    pdpUrlElement.appendChild(pdpLstMod);
   }

   Element pdpChangeFreq = doc.createElement("changefreq");
   pdpChangeFreq.appendChild(doc.createTextNode("Weekly"));
   pdpUrlElement.appendChild(pdpChangeFreq);

   rootElement.appendChild(pdpUrlElement); // PDP page is added to
             // root.
  }

 }

 private void generateXmlFile(String jcrPath, Document doc, Session session, String sitemapFile) {
  try {
   try {
    log.info("sitemap file: {}", sitemapFile);
    if (session.itemExists(sitemapFile)) {
     log.info("Sitemap exists");
     session.removeItem(sitemapFile);
     session.save();
     log.info("Old Sitemap is deleted");
    }
   } catch (Exception e) {
    log.error("Exception while removing xml file: {} ", e);
   }

   ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
   Source xmlSource = new DOMSource(doc);
   Result outputTarget = new StreamResult(outputStream);
   TransformerFactory.newInstance().newTransformer().transform(xmlSource, outputTarget);
   InputStream is = new ByteArrayInputStream(outputStream.toByteArray());

   // create file at file location
   ValueFactory valueFactory = session.getValueFactory();
   Binary contentValue;
   contentValue = valueFactory.createBinary(is);
   Node homepageNode = session.getNode(jcrPath);
   Node sitemapNode = homepageNode.addNode("sitemap_products.xml", "nt:file");
   Node resNode = sitemapNode.addNode("jcr:content", "nt:resource");
   resNode.setProperty("jcr:data", contentValue);
   resNode.setProperty("jcr:mimeType", "text/xml");
   Calendar lastModified = Calendar.getInstance();
   lastModified.setTimeInMillis(lastModified.getTimeInMillis());
   resNode.setProperty("jcr:lastModified", lastModified);
   session.save();
   log.info("Sitemap is successfull created at {}", sitemapFile);

  } catch (RepositoryException rpe) {
   log.error("Exception in Text Renderer: {}", rpe);
  } catch (Exception e) {
   log.error("Exception in while writting or creating file Renderer: {}", e);

  }

 }
}

Now build the project. Open configMgr and search for "AEM Quickstart Sitemap Configuration"


  • Enter Scheduler name and select enabled checkbox. 
  • Enter cross expression - to update the frequency.
  • Enter XML file URL - where you would like to read the product info. 
  • Enter JCR path - where you need to upload your sitemap_products.xml file
  • Enter Domain - this value will be used while creating the product url for attribute
Once the scheduler is ran, sitemap_products.xml file will be created under /content/aemquickstart/en







No comments :

Post a Comment