xsmeral.semnet.crawler.model
Class CrawlerConfiguration

java.lang.Object
  extended by xsmeral.semnet.crawler.model.CrawlerConfiguration

@XStreamConverter(value=CrawlerConfigurationConverter.class)
public class CrawlerConfiguration
extends Object

Container for HTMLCrawler configuration.
Contains


Field Summary
static boolean DEF_FAKE_REFERRER
           
static int DEF_GLOBAL_CRAWL_DELAY_MIN
           
static boolean DEF_POLICY_IGNORED
           
 
Constructor Summary
CrawlerConfiguration()
           
CrawlerConfiguration(Collection<HostDescriptor> hosts, RDBLayer dbLayer, int threadsPerHost, int globalCrawlDelayMinimum, boolean policyIgnored, boolean fakeReferrer)
          Initializes all fields
 
Method Summary
 RDBLayer getDBLayer()
          The relational DB layer used by the crawler for state persistence (URL storage)
 int getGlobalCrawlDelayMinimum()
          Minimal crawl delay in milliseconds
 Collection<HostDescriptor> getHosts()
          Hosts crawled by the crawler
 int getThreadsPerHost()
          Number of crawling threads per host
 boolean isFakeReferrer()
          Indication, whether the HTTP Referer header should be set to the base URL of the host
 boolean isPolicyIgnored()
          Indication of adherence to the Robots Exclusion Protocol
 void setDBLayer(RDBLayer dbLayer)
          The relational DB layer used by the crawler for state persistence (URL storage)
 void setFakeReferrer(boolean fakeReferrer)
          Indication, whether the HTTP Referer header should be set to the base URL of the host
 void setGlobalCrawlDelayMinimum(int globalCrawlDelayMinimum)
          Minimal crawl delay in milliseconds
 void setHosts(Collection<HostDescriptor> hosts)
          Hosts crawled by the crawler
 void setPolicyIgnored(boolean policyIgnored)
          Indication of adherence to the Robots Exclusion Protocol
 void setThreadsPerHost(int threadsPerHost)
          Number of crawling threads per host
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

DEF_GLOBAL_CRAWL_DELAY_MIN

public static final int DEF_GLOBAL_CRAWL_DELAY_MIN
See Also:
Constant Field Values

DEF_POLICY_IGNORED

public static final boolean DEF_POLICY_IGNORED
See Also:
Constant Field Values

DEF_FAKE_REFERRER

public static final boolean DEF_FAKE_REFERRER
See Also:
Constant Field Values
Constructor Detail

CrawlerConfiguration

public CrawlerConfiguration()

CrawlerConfiguration

public CrawlerConfiguration(Collection<HostDescriptor> hosts,
                            RDBLayer dbLayer,
                            int threadsPerHost,
                            int globalCrawlDelayMinimum,
                            boolean policyIgnored,
                            boolean fakeReferrer)
Initializes all fields

Method Detail

getDBLayer

public RDBLayer getDBLayer()
The relational DB layer used by the crawler for state persistence (URL storage)


setDBLayer

public void setDBLayer(RDBLayer dbLayer)
The relational DB layer used by the crawler for state persistence (URL storage)


isFakeReferrer

public boolean isFakeReferrer()
Indication, whether the HTTP Referer header should be set to the base URL of the host


setFakeReferrer

public void setFakeReferrer(boolean fakeReferrer)
Indication, whether the HTTP Referer header should be set to the base URL of the host


getGlobalCrawlDelayMinimum

public int getGlobalCrawlDelayMinimum()
Minimal crawl delay in milliseconds


setGlobalCrawlDelayMinimum

public void setGlobalCrawlDelayMinimum(int globalCrawlDelayMinimum)
Minimal crawl delay in milliseconds


getHosts

public Collection<HostDescriptor> getHosts()
Hosts crawled by the crawler


setHosts

public void setHosts(Collection<HostDescriptor> hosts)
Hosts crawled by the crawler


isPolicyIgnored

public boolean isPolicyIgnored()
Indication of adherence to the Robots Exclusion Protocol

See Also:
RobotsPolicy

setPolicyIgnored

public void setPolicyIgnored(boolean policyIgnored)
Indication of adherence to the Robots Exclusion Protocol

See Also:
RobotsPolicy

getThreadsPerHost

public int getThreadsPerHost()
Number of crawling threads per host


setThreadsPerHost

public void setThreadsPerHost(int threadsPerHost)
Number of crawling threads per host