1
2
3 """Clue generation module.
4
5 Clues are pieces of information obtained from the responses sent by a
6 webserver.
7 Their importance comes from the fact that they're the datastructure we use to
8 detect real servers behind HTTP load balancer devices.
9 """
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28 import time
29 import types
30 import rfc822
31 import hashlib
32
33 import Halberd.util
34
35
37 """A clue is what we use to tell real servers behind a virtual IP.
38
39 Clues are gathered during several connections to a web server and they
40 allow us to try to identify patterns in its responses. Those patterns could
41 allow us to find out which real servers are behind a VIP
42 """
44
45 self._count = 1
46
47
48 self.info = {
49 'server': '',
50 'contloc': '',
51 'cookies': [],
52 'date': '',
53 'digest': ''
54 }
55
56
57 self._local, self._remote = 0, 0
58
59 self.diff = None
60
61
62
63
64 self.__tmphdrs = ''
65
66
67 self.headers = None
68
69
70 - def parse(self, headers):
71 """Extracts all relevant information from the MIME headers replied by
72 the target.
73
74 @param headers: A set of MIME headers (a string as replied by the
75 webserver or a previously parsed sequence of name, value tuples).
76 @type headers: C{str}, C{list} or C{tuple}
77
78 @raise TypeError: If headers is neither a string nor a sequence.
79 """
80 if isinstance(headers, basestring):
81
82
83
84
85 self.headers = [tuple(line.split(':', 1)) \
86 for line in headers.splitlines() if line != '']
87 elif isinstance(headers, types.ListType):
88 self.headers = headers
89 else:
90 raise TypeError, 'Unable to parse headers of type %s' \
91 % type(headers).__name__
92
93
94
95 self.__tmphdrs = ''
96 for name, value in self.headers:
97 try:
98 handlerfn = getattr(self, '_get_' + Clue.normalize(name))
99 handlerfn(value)
100 except AttributeError:
101 self.__tmphdrs += '%s: %s ' % (name, value)
102
103 self._updateDigest()
104 self._calcDiff()
105
107 """Normalize string.
108
109 This method takes a string coming out of mime-fields and transforms it
110 into a valid Python identifier. That's done by removing invalid
111 non-alphanumeric characters and also numeric ones placed at the
112 beginning of the string.
113
114 @param name: String to be normalized.
115 @type name: C{str}
116
117 @return: Normalized string.
118 @rtype: C{str}
119 """
120 normal = name.translate(Halberd.util.table).lower()
121 while normal[0].isdigit():
122 normal = normal[1:]
123 return ''.join(normal)
124
125 normalize = staticmethod(normalize)
126
128 """Updates header fingerprint.
129 """
130 assert self.__tmphdrs != None
131 fingerprint = hashlib.sha1(self.__tmphdrs)
132 self.__tmphdrs = None
133 self.info['digest'] = fingerprint.hexdigest()
134
136 """Compute the time difference between the remote and local clocks.
137
138 @return: Time difference.
139 @rtype: C{int}
140 """
141 self.diff = int(self._local - self._remote)
142
143
145 """Increase the times this clue has been found.
146
147 @param num: A positive non-zero number of hits to increase.
148 @type num: C{int}
149
150 @raise ValueError: in case L{num} is less than or equal to zero.
151 """
152 if num <= 0:
153 raise ValueError
154 self._count += num
155
157 """Retrieve the number of times the clue has been found
158
159 @return: Number of hits.
160 @rtype: C{int}.
161 """
162 return self._count
163
164
166 """Sets the local clock attribute.
167
168 @param timestamp: The local time (expressed in seconds since the Epoch)
169 when the connection to the target was successfully completed.
170 @type timestamp: C{int}
171 """
172 self._local = timestamp
173
174
176 if self.diff != other.diff:
177 return False
178
179 if self.info['digest'] != other.info['digest']:
180 return False
181
182 return True
183
185 return not self == other
186
188 if not (self.diff or self.info['digest']):
189 return "<Clue at %x>" % id(self)
190 return "<Clue at %x diff=%d found=%d digest='%s'>" \
191 % (id(self), self.diff, self._count,
192 self.info['digest'][:4] + '...')
193
194
195
196
197
199 """Server:"""
200 self.info['server'] = field
201 self.__tmphdrs += field
202
204 """Date:"""
205 self.info['date'] = field
206 self._remote = time.mktime(rfc822.parsedate(field))
207
208 - def _get_content_location(self, field):
209 """Content-location:"""
210 self.info['contloc'] = field
211 self.__tmphdrs += field
212
214 """Set-cookie:"""
215 self.info['cookies'].append(field)
216
217
218
219
220
222 """Expires:"""
223 pass
224
228
229 - def _get_content_length(self, field):
230 """Content-length:"""
231 pass
232
234 """Last-modified:"""
235 pass
236
240
242 """Cache-expires:"""
243 pass
244
245 - def _get_content_type(self, field):
246 """Content-type:"""
247 pass
248
249
250
251