Results 1-25 of about 131 results found for 'crawler' in 0.14 seconds
 1 23456

server.py
# Python prototype for LinkSearch - http://linksearch.sourceforge.net
#
# "main" crawler/indexer program
#
# Copyright (C) 2000 Andreas Harth (aharth@users.sourceforge.net)
Language: Python
License: GPL
(C) 2000 Andreas Harth (aharth@users.sourceforge.net)
LOC: 109
SourceForge : LinkSearch: Google-like Search Engine (project search) : .../l/linksearch/linksearch/src/server.py

globals.py
# 
# crawler.py
#
# Python prototype for LinkSearch - http://linksearch.sourceforge.net
Language: Python
License: GPL
(C) 2000 Andreas Harth (aharth@users.sourceforge.net)
LOC: 24
SourceForge : LinkSearch: Google-like Search Engine (project search) : .../l/linksearch/linksearch/src/globals.py

factory.py
from anole.core.crawlhandler import CrawlHandler
from anole.core.fetcher import  Fetcher
from anole.core.crawler import  Crawler
from anole.core.processormgr import ProcessorMgr
import threading
Language: Python
LOC: 66
Google : anole-spider - a python spider (project search) : .../anole-spider/trunk/anole/core/factory.py

crawler.py
# -- coding: latin-1
""" crawler.py - Module which does crawling and downloading
    of urls from the web. This module is part of HarvestMan program.
    Rewrite this to use twisted.
Language: Python
LOC: 4
BerliOS : HarvestMan (project search) : .../harvestman/harvestman/HarvestMan-twisted/HarvestMan/crawler.py

urlqueue.py
# -- coding: latin-1
""" urlqueue.py - Module which controls queueing of urls
    created by crawler threads. This is part of the HarvestMan
    program.
    Rewrite this to use twisted.
Language: Python
LOC: 5
BerliOS : HarvestMan (project search) : .../harvestman/harvestman/HarvestMan-twisted/HarvestMan/urlqueue.py

montevideo.py
"""
Noti - crawler for http://www.montevideo.com.uy
"""
from datetime import datetime
Language: Python
LOC: 27
Google : noti - Noti es un framework de publicaci?n... c... (project search) : .../notiuy/apps/noti/crawlers/montevideo.py

example.py
   # Create a Document instance representing start url
   doc= ruya.Document(ruya.Uri(url))
   # Create a new crawler configuration object
   cfg= ruya.Config(ruya.Config.CrawlConfig(levels= 1, crawldelay= 5), ruya.Config.RedirectConfig(), ruya.Config.LogConfig())
   # Use a single-domain breadth crawler with crawler configuration
Language: Python
LOC: 34
Spider_20090529_inc : Ruya (project search) : .../0015/ruya/ruya-1.0.zip/ruya-1.0/example.py

crawler.py
from BeautifulSoup import BeautifulSoup
from urlparse import urljoin
class Crawler(object):
    user_agent = "Mozilla/5.0 (Macintosh; U; Intel Mac OS X; en-US; rv:1.8.1.7) Gecko/2007091417 Firefox/2.0.0.7"
    crawled_urls = {}
Language: Python
LOC: 33
Google : wiki-crawler - This is a web crawler speci...#39;s (project search) : .../Google/w/wiki-crawler/trunk/crawler.py

btqueue.py
    from BitQueue.optparse import OptionParser
from BitQueue import version
parser = OptionParser(usage='btqueue.py [-r|--root_path path] scheduler|crawler|remote|query|add [args...]',
                      version=version)
parser.add_option("-r","--root",dest="root_path",default=None,
Language: Python
LOC: 101
Spider_20090227_inc : BitTorrent Queue Manager (project search) : .../009/btqueue/BTQueue-0.1.3-489.2.4.src.rpm/BTQueue-0.1.3/btqueue.py

btqueue.py
    from BitQueue.optparse import OptionParser
from BitQueue import version
parser = OptionParser(usage='btqueue.py [-r|--root_path path] scheduler|crawler|remote|query|add [args...]',
                      version=version)
parser.add_option("-r","--root",dest="root_path",default=None,
Language: Python
LOC: 101
Spider_20090227_inc : BitTorrent Queue Manager (project search) : .../btqueue/BTQueue-0.1.3-489.2.4.noarch.rpm/usr/bin/btqueue.py

btqueue.py
    from BitQueue.optparse import OptionParser
from BitQueue import version
parser = OptionParser(usage='btqueue.py [-r|--root_path path] scheduler|crawler|remote|query|add [args...]',
                      version=version)
parser.add_option("-r","--root",dest="root_path",default=None,
Language: Python
LOC: 101
Spider_20090227_inc : BitTorrent Queue Manager (project search) : .../009/btqueue/BTQueue-0.1.3/btqueue.py

btqueue.py
    from BitQueue.optparse import OptionParser
from BitQueue import version
parser = OptionParser(usage='btqueue.py [-r|--root_path path] scheduler|crawler|remote|query|add [args...]',
                      version=version)
parser.add_option("-r","--root",dest="root_path",default=None,
Language: Python
LOC: 101
Spider_20090227_inc : BitTorrent Queue Manager (project search) : .../009/btqueue/BTQueue-0.1.3-489.2.3.src.rpm/BTQueue-0.1.3/btqueue.py

btqueue.py
    from BitQueue.optparse import OptionParser
from BitQueue import version
parser = OptionParser(usage='btqueue.py [-r|--root_path path] scheduler|crawler|remote|query|add [args...]',
                      version=version)
parser.add_option("-r","--root",dest="root_path",default=None,
Language: Python
LOC: 101
Spider_20090227_inc : BitTorrent Queue Manager (project search) : .../009/btqueue/BTQueue-0.1.3-489.2.2.src.rpm/BTQueue-0.1.3/btqueue.py

btqueue.py
    from BitQueue.optparse import OptionParser
from BitQueue import version
parser = OptionParser(usage='btqueue.py [-r|--root_path path] scheduler|crawler|remote|query|add [args...]',
                      version=version)
parser.add_option("-r","--root",dest="root_path",default=None,
Language: Python
LOC: 101
Spider_20090227_inc : BitTorrent Queue Manager (project search) : .../btqueue/BTQueue-0.1.3-489.2.3.noarch.rpm/usr/bin/btqueue.py

btqueue.py
    from BitQueue.optparse import OptionParser
from BitQueue import version
parser = OptionParser(usage='btqueue.py [-r|--root_path path] scheduler|crawler|remote|query|add [args...]',
                      version=version)
parser.add_option("-r","--root",dest="root_path",default=None,
Language: Python
LOC: 101
Spider_20090227_inc : BitTorrent Queue Manager (project search) : .../btqueue/BTQueue-0.1.3-489.2.2.noarch.rpm/usr/bin/btqueue.py

initiate.py
#spider
print 'spider'
spider.crawler(100)
print 'about to import indexer'
import indexer
Language: Python
LOC: 13
Spider_20081217_inc : Tyriel (project search) : .../inc_003/tyriel/tyriel-0.1.0.tgz/tyriel-0.1.0/initiate.py

ultimasnoticias.py
"""
Noti: crawler for http://www.utlimasnoticias.com.uy
"""
from urllib import urlopen
Language: Python
LOC: 41
Google : noti - Noti es un framework de publicaci?n... c... (project search) : .../notiuy/apps/noti/crawlers/ultimasnoticias.py

elpais.py
"""
Noti: Crawler for www.elpais.com.uy
"""
from datetime import date
Language: Python
LOC: 31
Google : noti - Noti es un framework de publicaci?n... c... (project search) : .../notiuy/apps/noti/crawlers/elpais.py

observa.py
"""
Noti - Crawler for http://observa.com.uy
"""
from datetime import datetime
Language: Python
LOC: 35
Google : noti - Noti es un framework de publicaci?n... c... (project search) : .../notiuy/apps/noti/crawlers/observa.py

larepublica.py
"""
Noti - Crawler for http://www.larepublica.com.uy
"""
import re
Language: Python
LOC: 41
Google : noti - Noti es un framework de publicaci?n... c... (project search) : .../notiuy/apps/noti/crawlers/larepublica.py

lessTrivialGame.py
    #   IN
    #   MM     Jeweled Torque
    #   PR     Silt Crawler
    #   Mirage Serpent
    #
Language: Python
LOC: 9
SourceForge : NetMage (project search) : .../netmage/effect/tests/games/lessTrivialGame.py

lessTrivialGame.py
    #   IN
    #   MM     Jeweled Torque
    #   PR     Silt Crawler
    #   Mirage Serpent
    #
Language: Python
LOC: 9
SourceForge : NetMage (project search) : .../netmage/io/tests/games/lessTrivialGame.py

anole-manage.py
from anole.core.crawlapp import CrawlApp as App
from anole.core.fetcher import HttpFetcher as Fetcher
from anole.core.crawler import  Crawler
from anole.core.processormgr import ProcessorMgr
from anole.core.dns import SimpleDns as Dns
Language: Python
LOC: 77
Google : anole-spider - a python spider (project search) : .../Google/a/anole-spider/trunk/anole-manage.py

ruya.py
#-*- coding: UTF-8 -*-
'''
U{Ruya<http://ruya.sourceforge.net/>} I{Arabic name meaning "sight, vision"} is a Python-based crawler for crawling English...  
B{It is targeted solely towards developers who want crawling functionality in their code}.
Some important features of this tool are-
Language: Python
LOC: 1171
Spider_20090529_inc : Ruya (project search) : .../0015/ruya/ruya-1.0.zip/ruya-1.0/ruya.py

crawler.py
# -- coding: latin-1
""" crawler.py - Module which does crawling and downloading
    of urls from the web. This module is part of HarvestMan program.
    Author: Anand B Pillai (abpillai at gmail dot com).
Language: Python
LOC: 495
BerliOS : HarvestMan (project search) : .../harvestman/harvestman/HarvestMan/HarvestMan/crawler.py

 1 23456

Project Matches
 1 2345...

mediaslave-crawler - A web crawler that can parse web pages for...

WWW__Bookmark__Crawler - WWW::Bookmark::Crawler

CrawlerWeb - Proiect crawler web facultate

wiki-crawler - This is a web crawler speci...#39;s - This is a web crawler specially designed to crawl...

Smart and Simple Web Crawler - Simple framework to implement crawling technolgy...








About Koders | Resources | Downloads | Support | Black Duck | Submit Project | Terms of Service | DMCA | Privacy Policy | Site Map| Contact Us