/[gentoo-src]/portage/pym/getbinpkg.py
Gentoo

Contents of /portage/pym/getbinpkg.py

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.15 - (show annotations) (download) (as text)
Sat Feb 26 06:35:20 2005 UTC (9 years, 6 months ago) by jstubbs
Branch: MAIN
CVS Tags: HEAD
Branch point for: portage_2_1
Changes since 1.14: +2 -1 lines
File MIME type: text/x-python
Brought forward changes from portage_2_0

1 # getbinpkg.py -- Portage binary-package helper functions
2 # Copyright 2003-2004 Gentoo Foundation
3 # Distributed under the terms of the GNU General Public License v2
4 # $Header: /var/cvsroot/gentoo-src/portage/pym/getbinpkg.py,v 1.14 2004/10/29 14:45:35 jstubbs Exp $
5 cvs_id_string="$Id: getbinpkg.py,v 1.12.2.3 2005/01/16 02:35:33 carpaski Exp $"[5:-2]
6
7 from output import *
8 import htmllib,HTMLParser,string,formatter,sys,os,xpak,time,tempfile,cPickle,base64
9
10 try:
11 import ftplib
12 except SystemExit, e:
13 raise
14 except Exception, e:
15 sys.stderr.write(red("!!! CANNOT IMPORT FTPLIB: ")+str(e)+"\n")
16
17 try:
18 import httplib
19 except SystemExit, e:
20 raise
21 except Exception, e:
22 sys.stderr.write(red("!!! CANNOT IMPORT HTTPLIB: ")+str(e)+"\n")
23
24 def make_metadata_dict(data):
25 myid,myglob = data
26
27 mydict = {}
28 for x in xpak.getindex_mem(myid):
29 mydict[x] = xpak.getitem(data,x)
30
31 return mydict
32
33 class ParseLinks(HTMLParser.HTMLParser):
34 """Parser class that overrides HTMLParser to grab all anchors from an html
35 page and provide suffix and prefix limitors"""
36 def __init__(self):
37 self.PL_anchors = []
38 HTMLParser.HTMLParser.__init__(self)
39
40 def get_anchors(self):
41 return self.PL_anchors
42
43 def get_anchors_by_prefix(self,prefix):
44 newlist = []
45 for x in self.PL_anchors:
46 if (len(x) >= len(prefix)) and (x[:len(suffix)] == prefix):
47 if x not in newlist:
48 newlist.append(x[:])
49 return newlist
50
51 def get_anchors_by_suffix(self,suffix):
52 newlist = []
53 for x in self.PL_anchors:
54 if (len(x) >= len(suffix)) and (x[-len(suffix):] == suffix):
55 if x not in newlist:
56 newlist.append(x[:])
57 return newlist
58
59 def handle_endtag(self,tag):
60 pass
61
62 def handle_starttag(self,tag,attrs):
63 if tag == "a":
64 for x in attrs:
65 if x[0] == 'href':
66 if x[1] not in self.PL_anchors:
67 self.PL_anchors.append(x[1])
68
69
70 def create_conn(baseurl,conn=None):
71 """(baseurl,conn) --- Takes a protocol://site:port/address url, and an
72 optional connection. If connection is already active, it is passed on.
73 baseurl is reduced to address and is returned in tuple (conn,address)"""
74 parts = string.split(baseurl, "://", 1)
75 if len(parts) != 2:
76 raise ValueError, "Provided URL does not contain protocol identifier. '%s'" % baseurl
77 protocol,url_parts = parts
78 del parts
79 host,address = string.split(url_parts, "/", 1)
80 del url_parts
81 address = "/"+address
82
83 userpass_host = string.split(host, "@", 1)
84 if len(userpass_host) == 1:
85 host = userpass_host[0]
86 userpass = ["anonymous"]
87 else:
88 host = userpass_host[1]
89 userpass = string.split(userpass_host[0], ":")
90 del userpass_host
91
92 if len(userpass) > 2:
93 raise ValueError, "Unable to interpret username/password provided."
94 elif len(userpass) == 2:
95 username = userpass[0]
96 password = userpass[1]
97 elif len(userpass) == 1:
98 username = userpass[0]
99 password = None
100 del userpass
101
102 http_headers = {}
103 http_params = {}
104 if username and password:
105 http_headers = {
106 "Authorization": "Basic %s" %
107 string.replace(
108 base64.encodestring("%s:%s" % (username, password)),
109 "\012",
110 ""
111 ),
112 }
113
114 if not conn:
115 if protocol == "https":
116 conn = httplib.HTTPSConnection(host)
117 elif protocol == "http":
118 conn = httplib.HTTPConnection(host)
119 elif protocol == "ftp":
120 passive = 1
121 if(host[-1] == "*"):
122 passive = 0
123 host = host[:-1]
124 conn = ftplib.FTP(host)
125 if password:
126 conn.login(username,password)
127 else:
128 sys.stderr.write(yellow(" * No password provided for username")+" '"+str(username)+"'\n\n")
129 conn.login(username)
130 conn.set_pasv(passive)
131 conn.set_debuglevel(0)
132 else:
133 raise NotImplementedError, "%s is not a supported protocol." % protocol
134
135 return (conn,protocol,address, http_params, http_headers)
136
137 def make_ftp_request(conn, address, rest=None, dest=None):
138 """(conn,address,rest) --- uses the conn object to request the data
139 from address and issuing a rest if it is passed."""
140 try:
141
142 if dest:
143 fstart_pos = dest.tell()
144
145 conn.voidcmd("TYPE I")
146 fsize = conn.size(address)
147
148 if (rest != None) and (rest < 0):
149 rest = fsize+int(rest)
150 if rest < 0:
151 rest = 0
152
153 if rest != None:
154 mysocket = conn.transfercmd("RETR "+str(address), rest)
155 else:
156 mysocket = conn.transfercmd("RETR "+str(address))
157
158 mydata = ""
159 while 1:
160 somedata = mysocket.recv(8192)
161 if somedata:
162 if dest:
163 dest.write(somedata)
164 else:
165 mydata = mydata + somedata
166 else:
167 break
168
169 if dest:
170 data_size = fstart_pos - dest.tell()
171 else:
172 data_size = len(mydata)
173
174 mysocket.close()
175 conn.voidresp()
176 conn.voidcmd("TYPE A")
177
178 return mydata,not (fsize==data_size),""
179
180 except ValueError, e:
181 return None,int(str(e)[:4]),str(e)
182
183
184 def make_http_request(conn, address, params={}, headers={}, dest=None):
185 """(conn,address,params,headers) --- uses the conn object to request
186 the data from address, performing Location forwarding and using the
187 optional params and headers."""
188
189 rc = 0
190 response = None
191 while (rc == 0) or (rc == 301) or (rc == 302):
192 try:
193 if (rc != 0):
194 conn,ignore,ignore,ignore,ignore = create_conn(address)
195 conn.request("GET", address, params, headers)
196 except SystemExit, e:
197 raise
198 except Exception, e:
199 return None,None,"Server request failed: "+str(e)
200 response = conn.getresponse()
201 rc = response.status
202
203 # 301 means that the page address is wrong.
204 if ((rc == 301) or (rc == 302)):
205 ignored_data = response.read()
206 del ignored_data
207 for x in string.split(str(response.msg), "\n"):
208 parts = string.split(x, ": ", 1)
209 if parts[0] == "Location":
210 if (rc == 301):
211 sys.stderr.write(red("Location has moved: ")+str(parts[1])+"\n")
212 if (rc == 302):
213 sys.stderr.write(red("Location has temporarily moved: ")+str(parts[1])+"\n")
214 address = parts[1]
215 break
216
217 if (rc != 200) and (rc != 206):
218 sys.stderr.write(str(response.msg)+"\n")
219 sys.stderr.write(response.read()+"\n")
220 sys.stderr.write("address: "+address+"\n")
221 return None,rc,"Server did not respond successfully ("+str(response.status)+": "+str(response.reason)+")"
222
223 if dest:
224 dest.write(response.read())
225 return "",0,""
226
227 return response.read(),0,""
228
229
230 def match_in_array(array, prefix="", suffix="", match_both=1, allow_overlap=0):
231 myarray = []
232
233 if not (prefix and suffix):
234 match_both = 0
235
236 for x in array:
237 add_p = 0
238 if prefix and (len(x) >= len(prefix)) and (x[:len(prefix)] == prefix):
239 add_p = 1
240
241 if match_both:
242 if prefix and not add_p: # Require both, but don't have first one.
243 continue
244 else:
245 if add_p: # Only need one, and we have it.
246 myarray.append(x[:])
247 continue
248
249 if not allow_overlap: # Not allow to overlap prefix and suffix
250 if len(x) >= (len(prefix)+len(suffix)):
251 y = x[len(prefix):]
252 else:
253 continue # Too short to match.
254 else:
255 y = x # Do whatever... We're overlapping.
256
257 if suffix and (len(x) >= len(suffix)) and (x[-len(suffix):] == suffix):
258 myarray.append(x) # It matches
259 else:
260 continue # Doesn't match.
261
262 return myarray
263
264
265
266 def dir_get_list(baseurl,conn=None):
267 """(baseurl[,connection]) -- Takes a base url to connect to and read from.
268 URL should be in the for <proto>://<site>[:port]<path>
269 Connection is used for persistent connection instances."""
270
271 if not conn:
272 keepconnection = 0
273 else:
274 keepconnection = 1
275
276 conn,protocol,address,params,headers = create_conn(baseurl, conn)
277
278 listing = None
279 if protocol in ["http","https"]:
280 page,rc,msg = make_http_request(conn,address,params,headers)
281
282 if page:
283 parser = ParseLinks()
284 parser.feed(page)
285 del page
286 listing = parser.get_anchors()
287 else:
288 raise Exception, "Unable to get listing: %s %s" % (rc,msg)
289 elif protocol in ["ftp"]:
290 if address[-1] == '/':
291 olddir = conn.pwd()
292 conn.cwd(address)
293 listing = conn.nlst()
294 conn.cwd(olddir)
295 del olddir
296 else:
297 listing = conn.nlst(address)
298 else:
299 raise TypeError, "Unknown protocol. '%s'" % protocol
300
301 if not keepconnection:
302 conn.close()
303
304 return listing
305
306 def file_get_metadata(baseurl,conn=None, chunk_size=3000):
307 """(baseurl[,connection]) -- Takes a base url to connect to and read from.
308 URL should be in the for <proto>://<site>[:port]<path>
309 Connection is used for persistent connection instances."""
310
311 if not conn:
312 keepconnection = 0
313 else:
314 keepconnection = 1
315
316 conn,protocol,address,params,headers = create_conn(baseurl, conn)
317
318 if protocol in ["http","https"]:
319 headers["Range"] = "bytes=-"+str(chunk_size)
320 data,rc,msg = make_http_request(conn, address, params, headers)
321 elif protocol in ["ftp"]:
322 data,rc,msg = make_ftp_request(conn, address, -chunk_size)
323 else:
324 raise TypeError, "Unknown protocol. '%s'" % protocol
325
326 if data:
327 xpaksize = xpak.decodeint(data[-8:-4])
328 if (xpaksize+8) > chunk_size:
329 myid = file_get_metadata(baseurl, conn, (xpaksize+8))
330 if not keepconnection:
331 conn.close()
332 return myid
333 else:
334 xpak_data = data[len(data)-(xpaksize+8):-8]
335 del data
336
337 myid = xpak.xsplit_mem(xpak_data)
338 if not myid:
339 myid = None,None
340 del xpak_data
341 else:
342 myid = None,None
343
344 if not keepconnection:
345 conn.close()
346
347 return myid
348
349
350 def file_get(baseurl,dest,conn=None,fcmd=None):
351 """(baseurl,dest,fcmd=) -- Takes a base url to connect to and read from.
352 URL should be in the for <proto>://[user[:pass]@]<site>[:port]<path>"""
353
354 if not fcmd:
355 return file_get_lib(baseurl,dest,conn)
356
357 fcmd = string.replace(fcmd, "${DISTDIR}", dest)
358 fcmd = string.replace(fcmd, "${URI}", baseurl)
359 fcmd = string.replace(fcmd, "${FILE}", os.path.basename(baseurl))
360 mysplit = string.split(fcmd)
361 mycmd = mysplit[0]
362 myargs = [os.path.basename(mycmd)]+mysplit[1:]
363 mypid=os.fork()
364 if mypid == 0:
365 os.execv(mycmd,myargs)
366 sys.stderr.write("!!! Failed to spawn fetcher.\n")
367 sys.exit(1)
368 retval=os.waitpid(mypid,0)[1]
369 if (retval & 0xff) == 0:
370 retval = retval >> 8
371 else:
372 sys.stderr.write("Spawned processes caught a signal.\n")
373 sys.exit(1)
374 if retval != 0:
375 sys.stderr.write("Fetcher exited with a failure condition.\n")
376 return 0
377 return 1
378
379 def file_get_lib(baseurl,dest,conn=None):
380 """(baseurl[,connection]) -- Takes a base url to connect to and read from.
381 URL should be in the for <proto>://<site>[:port]<path>
382 Connection is used for persistent connection instances."""
383
384 if not conn:
385 keepconnection = 0
386 else:
387 keepconnection = 1
388
389 conn,protocol,address,params,headers = create_conn(baseurl, conn)
390
391 sys.stderr.write("Fetching '"+str(os.path.basename(address)+"'\n"))
392 if protocol in ["http","https"]:
393 data,rc,msg = make_http_request(conn, address, params, headers, dest=dest)
394 elif protocol in ["ftp"]:
395 data,rc,msg = make_ftp_request(conn, address, dest=dest)
396 else:
397 raise TypeError, "Unknown protocol. '%s'" % protocol
398
399 if not keepconnection:
400 conn.close()
401
402 return rc
403
404
405 def dir_get_metadata(baseurl, conn=None, chunk_size=3000, verbose=1, usingcache=1, makepickle=None):
406 """(baseurl,conn,chunk_size,verbose) --
407 """
408 if not conn:
409 keepconnection = 0
410 else:
411 keepconnection = 1
412
413 if makepickle == None:
414 makepickle = "/var/cache/edb/metadata.idx.most_recent"
415
416 conn,protocol,address,params,headers = create_conn(baseurl, conn)
417
418 filedict = {}
419
420 try:
421 metadatafile = open("/var/cache/edb/remote_metadata.pickle")
422 metadata = cPickle.load(metadatafile)
423 sys.stderr.write("Loaded metadata pickle.\n")
424 metadatafile.close()
425 except SystemExit, e:
426 raise
427 except:
428 metadata = {}
429 if not metadata.has_key(baseurl):
430 metadata[baseurl]={}
431 if not metadata[baseurl].has_key("indexname"):
432 metadata[baseurl]["indexname"]=""
433 if not metadata[baseurl].has_key("timestamp"):
434 metadata[baseurl]["timestamp"]=0
435 if not metadata[baseurl].has_key("unmodified"):
436 metadata[baseurl]["unmodified"]=0
437 if not metadata[baseurl].has_key("data"):
438 metadata[baseurl]["data"]={}
439
440 filelist = dir_get_list(baseurl, conn)
441 tbz2list = match_in_array(filelist, suffix=".tbz2")
442 metalist = match_in_array(filelist, prefix="metadata.idx")
443 del filelist
444
445 # Determine if our metadata file is current.
446 metalist.sort()
447 metalist.reverse() # makes the order new-to-old.
448 havecache=0
449 for mfile in metalist:
450 if usingcache and \
451 ((metadata[baseurl]["indexname"] != mfile) or \
452 (metadata[baseurl]["timestamp"] < int(time.time()-(60*60*24)))):
453 # Try to download new cache until we succeed on one.
454 data=""
455 for trynum in [1,2,3]:
456 mytempfile = tempfile.TemporaryFile()
457 try:
458 file_get(baseurl+"/"+mfile, mytempfile, conn)
459 if mytempfile.tell() > len(data):
460 mytempfile.seek(0)
461 data = mytempfile.read()
462 except ValueError, e:
463 sys.stderr.write("--- "+str(e)+"\n")
464 if trynum < 3:
465 sys.stderr.write("Retrying...\n")
466 mytempfile.close()
467 continue
468 if match_in_array([mfile],suffix=".gz"):
469 sys.stderr.write("gzip'd\n")
470 try:
471 import gzip
472 mytempfile.seek(0)
473 gzindex = gzip.GzipFile(mfile[:-3],'rb',9,mytempfile)
474 data = gzindex.read()
475 except SystemExit, e:
476 raise
477 except Exception, e:
478 mytempfile.close()
479 sys.stderr.write("!!! Failed to use gzip: "+str(e)+"\n")
480 mytempfile.close()
481 try:
482 metadata[baseurl]["data"] = cPickle.loads(data)
483 del data
484 metadata[baseurl]["indexname"] = mfile
485 metadata[baseurl]["timestamp"] = int(time.time())
486 metadata[baseurl]["modified"] = 0 # It's not, right after download.
487 sys.stderr.write("Pickle loaded.\n")
488 break
489 except SystemExit, e:
490 raise
491 except Exception, e:
492 sys.stderr.write("!!! Failed to read data from index: "+str(mfile)+"\n")
493 sys.stderr.write("!!! "+str(e)+"\n")
494 try:
495 metadatafile = open("/var/cache/edb/remote_metadata.pickle", "w+")
496 cPickle.dump(metadata,metadatafile)
497 metadatafile.close()
498 except SystemExit, e:
499 raise
500 except Exception, e:
501 sys.stderr.write("!!! Failed to write binary metadata to disk!\n")
502 sys.stderr.write("!!! "+str(e)+"\n")
503 break
504 # We may have metadata... now we run through the tbz2 list and check.
505 sys.stderr.write(yellow("cache miss: 'x'")+" --- "+green("cache hit: 'o'")+"\n")
506 for x in tbz2list:
507 x = os.path.basename(x)
508 if ((not metadata[baseurl]["data"].has_key(x)) or \
509 (x not in metadata[baseurl]["data"].keys())):
510 sys.stderr.write(yellow("x"))
511 metadata[baseurl]["modified"] = 1
512 myid = file_get_metadata(baseurl+"/"+x, conn, chunk_size)
513
514 if myid[0]:
515 metadata[baseurl]["data"][x] = make_metadata_dict(myid)
516 elif verbose:
517 sys.stderr.write(red("!!! Failed to retrieve metadata on: ")+str(x)+"\n")
518 else:
519 sys.stderr.write(green("o"))
520 sys.stderr.write("\n")
521
522 try:
523 if metadata[baseurl].has_key("modified") and metadata[baseurl]["modified"]:
524 metadata[baseurl]["timestamp"] = int(time.time())
525 metadatafile = open("/var/cache/edb/remote_metadata.pickle", "w+")
526 cPickle.dump(metadata,metadatafile)
527 metadatafile.close()
528 if makepickle:
529 metadatafile = open(makepickle, "w")
530 cPickle.dump(metadata[baseurl]["data"],metadatafile)
531 metadatafile.close()
532 except SystemExit, e:
533 raise
534 except Exception, e:
535 sys.stderr.write("!!! Failed to write binary metadata to disk!\n")
536 sys.stderr.write("!!! "+str(e)+"\n")
537
538 if not keepconnection:
539 conn.close()
540
541 return metadata[baseurl]["data"]

  ViewVC Help
Powered by ViewVC 1.1.20