Package Halberd :: Package clues :: Module file
[hide private]
[frames] | no frames]

Source Code for Module Halberd.clues.file

  1  # -*- coding: iso-8859-1 -*- 
  2   
  3  """Utilities for clue storage. 
  4   
  5  Provides functionality needed to store clues on disk. 
  6  """ 
  7   
  8  # Copyright (C) 2004, 2005, 2006, 2010  Juan M. Bello Rivas <jmbr@superadditive.com> 
  9  # 
 10  # This program is free software; you can redistribute it and/or modify 
 11  # it under the terms of the GNU General Public License as published by 
 12  # the Free Software Foundation; either version 2 of the License, or 
 13  # (at your option) any later version. 
 14  # 
 15  # This program is distributed in the hope that it will be useful, 
 16  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 17  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 18  # GNU General Public License for more details. 
 19  # 
 20  # You should have received a copy of the GNU General Public License 
 21  # along with this program; if not, write to the Free Software 
 22  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 23   
 24   
 25  import os 
 26  import csv 
 27  import types 
 28  import shutil 
 29   
 30  import Halberd.util 
 31  from Halberd.clues.Clue import Clue 
 32   
 33   
34 -class InvalidFile(Exception):
35 """The loaded file is not a valid clue file. 36 """
37 - def __init__(self, msg):
38 self.msg = msg
39
40 - def __str__(self):
41 return self.msg
42 43
44 -def save(filename, clues):
45 """Save a clues to a file. 46 47 @param filename: Name of the file where the clues will be written to. 48 @type filename: C{str} 49 50 @param clues: Sequence of clues to write. 51 @type clues: C{list} 52 """ 53 # Create or truncate the destination file. 54 cluefp = open(filename, 'w+') 55 writer = csv.writer(cluefp) 56 57 for clue in clues: 58 # Store the most relevant clue information. 59 writer.writerow((clue.getCount(), clue._local, clue.headers)) 60 61 cluefp.close()
62 63
64 -def load(filename):
65 """Load clues from file. 66 67 @param filename: Name of the files where the clues are stored. 68 @type filename: C{str} 69 70 @return: Clues extracted from the file. 71 @rtype: C{list} 72 73 @raise InvalidFile: In case there's a problem while reinterpreting the 74 clues. 75 """ 76 cluefp = open(filename, 'r') 77 reader = csv.reader(cluefp) 78 79 clues = [] 80 for tup in reader: 81 try: 82 count, localtime, headers = tup 83 except ValueError: 84 raise InvalidFile, 'Cannot unpack fields' 85 86 # Recreate the current clue. 87 clue = Clue() 88 try: 89 clue._count = int(count) 90 clue._local = float(localtime) 91 except ValueError: 92 raise InvalidFile, 'Could not convert fields' 93 94 # This may be risky from a security standpoint. 95 clue.headers = eval(headers, {}, {}) 96 if not (isinstance(clue.headers, types.ListType) or 97 isinstance(clue.headers, types.TupleType)): 98 raise InvalidFile, 'Wrong clue header field' 99 clue.parse(clue.headers) 100 101 clues.append(clue) 102 103 cluefp.close() 104 return clues
105 106
107 -class ClueDir:
108 """Stores clues hierarchically using the underlying filesystem. 109 110 ClueDir tries to be as portable as possible but requires the host operating 111 system to be able to create long filenames (and directories, of course). 112 113 This is an example layout:: 114 115 http___www_microsoft_com/ 116 http___www_microsoft_com/207_46_134_221.clu 117 http___www_microsoft_com/207_46_156_220.clu 118 http___www_microsoft_com/207_46_156_252.clu 119 . 120 . 121 . 122 """
123 - def __init__(self, root=None):
124 """Initializes ClueDir object. 125 126 @param root: Root folder where to start creating sub-folders. 127 @type root: C{str} 128 """ 129 self.ext = 'clu' 130 if not root: 131 self.root = os.getcwd() 132 else: 133 self.root = root 134 self._mkdir(self.root)
135
136 - def _sanitize(self, url):
137 """Filter out potentially dangerous chars. 138 """ 139 return url.translate(Halberd.util.table)
140
141 - def _mkdir(self, dest):
142 """Creates a directory to store clues. 143 144 If the directory already exists it won't complain about that. 145 """ 146 try: 147 st = os.stat(dest) 148 except OSError: 149 os.mkdir(dest) 150 else: 151 if not shutil.stat.S_ISDIR(st.st_mode): 152 raise InvalidFile, \ 153 '%s already exist and is not a directory' % dest 154 155 return dest
156
157 - def save(self, url, addr, clues):
158 """Hierarchically write clues. 159 160 @param url: URL scanned (will be used as a directory name). 161 @type url: C{url} 162 163 @param addr: Address of the target. 164 @type addr: C{str} 165 166 @param clues: Clues to be stored. 167 @type clues: C{list} 168 169 @raise OSError: If the directories can't be created. 170 @raise IOError: If the file can't be stored successfully. 171 """ 172 assert url and addr 173 174 urldir = self._mkdir(os.path.join(self.root, self._sanitize(url))) 175 filename = self._sanitize(addr) + os.extsep + self.ext 176 cluefile = os.path.join(urldir, filename) 177 178 Halberd.clues.file.save(cluefile, clues)
179 180 181 # vim: ts=4 sw=4 et 182