Package Halberd :: Package clues :: Module Clue
[hide private]
[frames] | no frames]

Source Code for Module Halberd.clues.Clue

  1  # -*- coding: iso-8859-1 -*- 
  2   
  3  """Clue generation module. 
  4   
  5  Clues are pieces of information obtained from the responses sent by a 
  6  webserver. 
  7  Their importance comes from the fact that they're the datastructure we use to 
  8  detect real servers behind HTTP load balancer devices. 
  9  """ 
 10   
 11  # Copyright (C) 2004, 2005, 2006, 2010  Juan M. Bello Rivas <jmbr@superadditive.com> 
 12  # 
 13  # This program is free software; you can redistribute it and/or modify 
 14  # it under the terms of the GNU General Public License as published by 
 15  # the Free Software Foundation; either version 2 of the License, or 
 16  # (at your option) any later version. 
 17  # 
 18  # This program is distributed in the hope that it will be useful, 
 19  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 20  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 21  # GNU General Public License for more details. 
 22  # 
 23  # You should have received a copy of the GNU General Public License 
 24  # along with this program; if not, write to the Free Software 
 25  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 26   
 27   
 28  import time 
 29  import types 
 30  import rfc822 
 31  import hashlib 
 32   
 33  import Halberd.util 
 34   
 35   
36 -class Clue:
37 """A clue is what we use to tell real servers behind a virtual IP. 38 39 Clues are gathered during several connections to a web server and they 40 allow us to try to identify patterns in its responses. Those patterns could 41 allow us to find out which real servers are behind a VIP 42 """
43 - def __init__(self):
44 # Number of times this clue has been found. 45 self._count = 1 46 47 # Generic server info (sometimes useful for distinguising servers). 48 self.info = { 49 'server': '', 50 'contloc': '', 51 'cookies': [], 52 'date': '', 53 'digest': '' 54 } 55 56 # Local time and remote time (in seconds since the Epoch) 57 self._local, self._remote = 0, 0 58 59 self.diff = None 60 61 # We store the headers we're interested in digesting in a string and 62 # calculate its hash _after_ the header processing takes place. This 63 # way we incur in less computational overhead. 64 self.__tmphdrs = '' 65 66 # Original MIME headers. They're useful during analysis and reporting. 67 self.headers = None
68 69
70 - def parse(self, headers):
71 """Extracts all relevant information from the MIME headers replied by 72 the target. 73 74 @param headers: A set of MIME headers (a string as replied by the 75 webserver or a previously parsed sequence of name, value tuples). 76 @type headers: C{str}, C{list} or C{tuple} 77 78 @raise TypeError: If headers is neither a string nor a sequence. 79 """ 80 if isinstance(headers, basestring): 81 # We parse the server's response into a sequence of name, value 82 # tuples instead of a dictionary because with this approach we keep 83 # the header's order as sent by the target, This is a relevant 84 # piece of information we can't afford to miss. 85 self.headers = [tuple(line.split(':', 1)) \ 86 for line in headers.splitlines() if line != ''] 87 elif isinstance(headers, types.ListType): 88 self.headers = headers 89 else: 90 raise TypeError, 'Unable to parse headers of type %s' \ 91 % type(headers).__name__ 92 93 # We examine each MIME field and try to find an appropriate handler. If 94 # there is none we simply digest the info it provides. 95 self.__tmphdrs = '' 96 for name, value in self.headers: 97 try: 98 handlerfn = getattr(self, '_get_' + Clue.normalize(name)) 99 handlerfn(value) 100 except AttributeError: 101 self.__tmphdrs += '%s: %s ' % (name, value) 102 103 self._updateDigest() 104 self._calcDiff()
105
106 - def normalize(name):
107 """Normalize string. 108 109 This method takes a string coming out of mime-fields and transforms it 110 into a valid Python identifier. That's done by removing invalid 111 non-alphanumeric characters and also numeric ones placed at the 112 beginning of the string. 113 114 @param name: String to be normalized. 115 @type name: C{str} 116 117 @return: Normalized string. 118 @rtype: C{str} 119 """ 120 normal = name.translate(Halberd.util.table).lower() 121 while normal[0].isdigit(): 122 normal = normal[1:] 123 return ''.join(normal)
124 125 normalize = staticmethod(normalize) 126
127 - def _updateDigest(self):
128 """Updates header fingerprint. 129 """ 130 assert self.__tmphdrs != None 131 fingerprint = hashlib.sha1(self.__tmphdrs) 132 self.__tmphdrs = None 133 self.info['digest'] = fingerprint.hexdigest()
134
135 - def _calcDiff(self):
136 """Compute the time difference between the remote and local clocks. 137 138 @return: Time difference. 139 @rtype: C{int} 140 """ 141 self.diff = int(self._local - self._remote)
142 143
144 - def incCount(self, num=1):
145 """Increase the times this clue has been found. 146 147 @param num: A positive non-zero number of hits to increase. 148 @type num: C{int} 149 150 @raise ValueError: in case L{num} is less than or equal to zero. 151 """ 152 if num <= 0: 153 raise ValueError 154 self._count += num
155
156 - def getCount(self):
157 """Retrieve the number of times the clue has been found 158 159 @return: Number of hits. 160 @rtype: C{int}. 161 """ 162 return self._count
163 164
165 - def setTimestamp(self, timestamp):
166 """Sets the local clock attribute. 167 168 @param timestamp: The local time (expressed in seconds since the Epoch) 169 when the connection to the target was successfully completed. 170 @type timestamp: C{int} 171 """ 172 self._local = timestamp
173 174
175 - def __eq__(self, other):
176 if self.diff != other.diff: 177 return False 178 179 if self.info['digest'] != other.info['digest']: 180 return False 181 182 return True
183
184 - def __ne__(self, other):
185 return not self == other
186
187 - def __repr__(self):
188 if not (self.diff or self.info['digest']): 189 return "<Clue at %x>" % id(self) 190 return "<Clue at %x diff=%d found=%d digest='%s'>" \ 191 % (id(self), self.diff, self._count, 192 self.info['digest'][:4] + '...')
193 194 # ================================================================== 195 # The following methods extract relevant data from the MIME headers. 196 # ================================================================== 197
198 - def _get_server(self, field):
199 """Server:""" 200 self.info['server'] = field 201 self.__tmphdrs += field # Make sure this gets hashed too.
202
203 - def _get_date(self, field):
204 """Date:""" 205 self.info['date'] = field 206 self._remote = time.mktime(rfc822.parsedate(field))
207
208 - def _get_content_location(self, field):
209 """Content-location:""" 210 self.info['contloc'] = field 211 self.__tmphdrs += field
212 216 217 # ==================================================== 218 # Ignored headers (they don't contribute to the hash). 219 # ==================================================== 220
221 - def _get_expires(self, field):
222 """Expires:""" 223 pass
224
225 - def _get_age(self, field):
226 """Age:""" 227 pass
228
229 - def _get_content_length(self, field):
230 """Content-length:""" 231 pass
232
233 - def _get_last_modified(self, field):
234 """Last-modified:""" 235 pass
236
237 - def _get_etag(self, field):
238 """ETag:""" 239 pass
240
241 - def _get_cache_expires(self, field):
242 """Cache-expires:""" 243 pass
244
245 - def _get_content_type(self, field):
246 """Content-type:""" 247 pass
248 249 250 # vim: ts=4 sw=4 et 251