返回信息流附件(4.1KB) __geturlinfo.py
各位大神在执行如上的python程序时为啥会报如图的错?错在哪?谢谢各位了
代码直接贴吧
# -*- coding: utf-8 -*-
import os
import glob
from imp import reload
import sys
import _thread
import threading
import time
inputdir='''indir'''
outputdir='''hugeoutput'''
logdir='''errlog'''
useragent='''\"User-Agent:Mozilla/5.0 (iPhone; CPU iPhone OS 7_0 like Mac OS X; en-us) AppleWebKit/534+ (KHTML, like Gecko) Version/7.0 Mobile/11A465 Safari/9537.53 MsnBot-Media /1.0b\"'''
class urlprocessor(threading.Thread):
def __init__(self,infile,outfile):
threading.Thread.__init__(self)
self.inputfile=infile
self.number=outfile
self.outputfile=outputdir+'''\\'''+outfile+".txt"
self.logfile=logdir+'''\\'''+outfile+".txt"
self.outstream=open(self.outputfile,'a',encoding= 'utf-8')
self.logstream=open(self.logfile,'a',encoding= 'utf-8')
self.cmd='''curl '''
self.servernotfound=0
self.http=0
self.location=0
self.redirected=0
self.normal=0
self.outputline=''''''
def run(self):
with open(self.inputfile,'r',encoding= 'utf-8') as f0:
f1=f0.readlines()
print(len(f1))
out = open("hugeoutput\\"+self.number+".txt",encoding= 'utf-8')
outcont=out.readlines()
outsize=len(outcont)
out.close()
err = open("errlog\\"+self.number+".txt",encoding= 'utf-8')
errcont=err.readlines()
outsize+=len(errcont)
err.close()
ff=f1[outsize:-1]
print(outsize)
time.sleep(10)
for i in ff:
tmp=i.split()
url=tmp[0]
self.cmd="curl -H "+useragent+" "+url+" -I"
self.outputline=url+' '
print("it work ok\n")
try:
results=os.popen(self.cmd).readlines()
except:
try:
self.logstream.write(url+'\n')
self.logstream.flush()
print("decode error\n")
except:
print("log write error!\n")
for item in results:
http=item.find('HTTP')
if http>=0:
self.http=1
self.outputline+=item[:-1]
break
if self.http==0:
self.outputline+=' '+"HTTP/servernotfound"
self.servernotfound+=1
else:
self.http=0
for item in results:
location=item.find('Location')
if location>=0:
self.redirected+=1
self.location=1
substr=item.split(':')
self.outputline+=' '+item
break
if self.location==0:
self.outputline+=' '+url+'\n'
self.normal+=1
else:
self.location=0
try:
self.outstream.write(self.outputline)
self.outstream.flush()
except Exception:
print("output err\n")
self.logstream.write('\n')
self.logstream.write("redirected:"+str(self.redirected)+" normal:"+\
str(self.normal)+" servernotfound:"+\
str(self.servernotfound))
self.outstream.close()
self.logstream.close()
print("be at the end\n")
def stop(self):
self.thread_stop=1
def caller():
files=glob.glob(inputdir+'\\*.txt')
threadlist=[]
for file in files:
filename=file.split("\\")[1]
filenum=filename.split(".")[0]
thread=urlprocessor(file,filenum)
thread.start()
threadlist.append(thread)
for threads in threadlist:
threads.stop()
print("A thread finished\n")
if __name__=='__main__':
caller()
这是一条镜像帖。来源:北邮人论坛 / python / #1637同步于 2014/7/13
该镜像源已超过 30 天没有更新,可能在源站已被删除。
Python机器人发帖
各位大神帮忙查查错
ppzhoujun
2014/7/13镜像同步4 回复
订阅后,新回复会通过你的通知中心匿名送达。
4 条回复
嗯,我试试,txt中是些url地址。谢谢
【 在 b78859793 的大作中提到: 】
: 你的input dir里都是些什么txt?open了不存在的文件?
: 路径处理建议用os.path
【 在 ppzhoujun 的大作中提到: 】
: 嗯,我试试,txt中是些url地址。谢谢
怀疑是open了存在的txt 看你的txt应该是数字.txt
很有可能就是哪里路径什么的出了问题 可以用绝对路径试试
问题查出来了url没加引号,唉
【 在 b78859793 的大作中提到: 】
: 怀疑是open了存在的txt 看你的txt应该是数字.txt
: 很有可能就是哪里路径什么的出了问题 可以用绝对路径试试