最新消息:

写一个网站备份文件扫描的工具

代码编写 pang0lin 2972浏览 0评论

一直以来都用御剑,但是用多了之后就越来越觉得蠢。于是乎就自己写了一个。代码很少,只是专门用于批量的扫有没有备份文件。

# encoding: utf-8
# 备份文件扫描

import sys
import requests
import urllib
import threading
import Queue

class FileWorker(threading.Thread):
  def __init__(self,queue, url, file_ext):
    threading.Thread.__init__(self)
    self.queue = queue
    self.url = url
    self.file_ext = file_ext
  def run(self):
    while True:
      if self.queue.empty():
        break
      try:
        uri = self.queue.get_nowait()
        for ext in self.file_ext:
          filename = str(uri).rstrip() + "." + ext
          temp_url = self.url + "/" + filename
          if getCode(temp_url) == 200:
            resources.append(temp_url)
      except Exception, e:
        print e # 队列阻塞
        break


def scanFile(url):
  rnt = []
  #使用返回状态码来验证是否整的是备份文件,如果网站本身对返回状态码做了修改,则不进行本次扫描
  not_exists_file = url + '/437hsdf653ggfyrga8.zip'
  if getCode(not_exists_file) == 200:
    return rnt

  file_ext = ['rar', 'zip', 'tar.gz','gz','tgz','7z','z','bz2','tar.bz2','iso','cab']
  
  #启动多线程
  queue = Queue.Queue()
  uriList = file('uri1.txt','r')
  for i in uriList:
    i = i.rstrip()
    queue.put(i)
  #初始化线程
  threads = []
  for i in range(20):
    a = FileWorker(queue, url, file_ext)
    threads.append(a)
  global resources
  resources = []
  for t in threads: # 启动线程
    t.start()
  for t in threads: # 等待线程执行结束后,回到主线程中
    t.join()

  #有的网站的备份文件为网站域名.rar这种
  proto, rest = urllib.splittype(url)
  res, rest = urllib.splithost(rest)
  if res:
    res_host = res.split(":")[0]
    res_arr = res_host.split(".")
    res_root_host = ".".join(res_arr[-2:])
    for ext in file_ext:
      filename1 = res_host + "." + ext
      filename2 = res_root_host + "." + ext
      temp_url1 = url + "/" + filename1
      temp_url2 = url + "/" + filename2
      code = getCode(temp_url1)
      if code == 200:
        resources.append(temp_url1)
      code = getCode(temp_url2)
      if code == 200:
        resources.append(temp_url2)
  return resources

def fileIsLive(url):
  code = getCode(url)
  if code == 200:
    return True
  return False

def getCode(url):
  try:
    code = requests.get(url, allow_redirects=False).status_code
  except:
    code = 0
  return code

if __name__ == "__main__":
  for host in file("hosts.txt", "r"):
    host = host.strip()
    print "starting scan:"+host
    urls = scanFile("http://%s" % (host))
    print urls

再顺便给一个我整体的简洁的备份文件可能的字典uri1

 

转载请注明:我是穿山甲,小弟穿山乙 » 写一个网站备份文件扫描的工具

发表我的评论
取消评论
表情

Hi,您需要填写昵称和邮箱!

  • 昵称 (必填)
  • 邮箱 (必填)
  • 网址