#!/usr/bin/env python
# -*- coding: cp1252 -*-
import sys
import string
import httplib
import time
import getopt


def elaborate(pattern, query, page):
    temp=pattern
    temp=temp.replace('###QUERY###',query)
    temp=temp.replace('###PAGE###',page)
    return temp

def searchMe(pages,url):
    result=[]
    z=0
    for i in pages:
        print 'Analizzo pagina ',z,'...'
        temp=i.split('<p class=g><a class=l')
        z=z+1
        x=0
        for j in temp:
           position=j.find(url)
           if position >= 0:
               result.append('Pagina:'+str(z)+', posizione:'+str(x))
           x=x+1
    return result

def output(res,fout):
    fout.write("<html><head><title>Risultati ricerca</title></head><body><h1>Risultati ricerca su "+engine+"<br/>del sito <b>"+url+"</b> con chiave <i>"+query+"</i> in "+str(numPage)+" pagine</h1><br><br><hr><br><br>")
    fout.write('<ol>')
    for i in res:
        fout.write('<li>'+i+'</li>')
        print 'Record '+url+' trovato -> '+i
    fout.write('</ol>')
    fout.write('<h5>Dati prodotti da whereIs.py; Copyright <a href=\"http://www.antonellocicchese.com\" target="_blank">Antonello Cicchese</a></h5></body></html>')
    print 'Analisi conclusa'
    
def main():
    pattern="/search?q=###QUERY###&start=###PAGE###&sa=N"
    htmlSrc=[]
    temp=[]

    fname='data.txt'
    fin=open(fname,'r')
    fout=open('risultato.htm','w+')
    line=fin.readlines()
    fin.close()
    data=[]
    for ele in range(4):
        data.append(str(line[ele].split('=')[1]))

    global engine
    global url
    global query
    global numPage
    
    engine=data[0].strip()
    url=data[1].strip()
    query=data[2].strip()
    numPage=int(data[3].strip())

    if numPage>100:
        print "Una ricerca su piu' di 100 pagine non produce risultati attendibili"
        print "numero di pagine ridotto a 100"
        numPage=100
        
    for i in range(numPage):
         ricerca=elaborate(pattern,query,str(i*10))
         print 'Connessione n°',i,' URL: http://'+engine+''+ricerca
         try:
                   conn = httplib.HTTPConnection(engine)
                   conn.request ("GET",ricerca)
                   r = conn.getresponse()                
         except:
                        print('Impossibile connettersi')
                        break
         if r.reason == 'OK':
                        data = r.read()
                        print 'Pagina ',i,' scaricata'
                        htmlSrc.append(data)
         else:
                   print("Errore download pagina")
                   break
         conn.close()
         time.sleep(2) #Numero di secondi di pausa tra una query e l'altra
     
    temp=searchMe(htmlSrc,url)
    
    output(temp,fout)
    
    fout.close()

if __name__=="__main__":
    main()
