```
import xlwings as xw
from selenium import webdriver
import time
import os
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
import random
import datetime
_MAPPING = (u'零', u'一', u'二', u'三', u'四', u'五', u'六', u'七', u'八', u'九', u'十', u'十一', u'十二', u'十三', u'十四', u'十五', u'十六', u'十七',u'十八', u'十九')
_P0 = (u'', u'十', u'百', u'千',)
_S4 = 10 ** 4
def _to_chinese4(num):
assert (0 <= num and num < _S4)
if num < 20:
return _MAPPING[num]
else:
lst = []
while num >= 10:
lst.append(num % 10)
num = num / 10
lst.append(num)
c = len(lst) # 位数
result = u''
for idx, val in enumerate(lst):
val = int(val)
if val != 0:
result += _P0[idx] + _MAPPING[val]
if idx < c - 1 and lst[idx + 1] == 0:
result += u'零'
return result[::-1]
def listweb(num):
driver = webdriver.Chrome('c:\\google\\chromedriver.exe')
url = '#'
driver.get(url)
time.sleep(10)
driver.find_element_by_class_name("link-login").click()
time.sleep(10)
driver.find_element_by_id("username").send_keys(nm)
driver.find_element_by_id("password").send_keys(pw)
driver.find_element_by_class_name("submitBtn").click()
time.sleep(10)
driver.find_element_by_id("value(input1)").send_keys(num)
driver.find_element_by_class_name("searchButton").click()
time.sleep(5)
driver.find_element_by_class_name("search-results-checkbox-align").click()
driver.find_element_by_class_name("snowplow-marked-list").click()
driver.find_element_by_css_selector("div.umlProdListItem >a.umlProdLink").click()
driver.find_element_by_id("ADDRS_fields").click()
driver.find_element_by_id("PMID_fields").click()
driver.find_element_by_id("AUTHORSIDENTIFIERS_fields").click()
driver.find_element_by_id("PUBINFO_fields").click()
driver.find_element_by_id("exportTypeName").click()
driver.find_element_by_class_name("quickOutputPrint").click()
all_h = driver.window_handles
WebDriverWait(driver, 600).until(lambda x: x.find_elements_by_tag_name("table"))
driver.switch_to.window(all_h[0])
driver.switch_to.window(all_h[1])
driver.find_elements_by_tag_name("table")[2]
list1 = []
for i in driver.find_elements_by_tag_name("table")[2].find_elements_by_tag_name('td'):
list1.append(i.text)
list2 = list1[1:]
for i in list2:
if('来源出版物' in i):
laiyuan = i
a = laiyuan.split('出版年')
import re
b = re.findall('(\d{4})',a[1])
if(b[0]<'2019'):
#2018
nian = b[0]
print('2018')
driver.switch_to.window(all_h[0])
driver.find_element_by_id("show_journal_overlay_link_1").click()
time.sleep(5)
if '/' in str(num):
numok = str(num.split('/',1)[0])
else:
numok = str(num)
print(nian)
niancha = 2019-int(nian)
#driver.switch_to.window(all_h[0])
driver.find_elements_by_partial_link_text('Journal Citation Reports')[1].click()
all_h = driver.window_handles
driver.switch_to.window(all_h[2])
time.sleep(60)
tf = True
while tf:
time.sleep(10)
tf = "footer-line1" not in driver.page_source
else:
time.sleep(10)
WebDriverWait(driver, 600).until(lambda x: x.find_element_by_css_selector("div.journal-profile-years-tabset"))
driver.find_element_by_css_selector("div.journal-profile-page >div.journal-profile-years-tabset >div.tabset-head >div.tab-1").click()
jif = driver.find_elements_by_css_selector("[row='" + str(niancha) + "']>td>div.journalImpactFactor")[1].text
print(jif)
jcryx = nian+'年JCR影响因子:'+jif
list2.append(jcryx)
jcrfq = nian+'年JCR分区:'
list2.append(jcrfq)
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
set2 = driver.find_elements_by_css_selector("div.journal-data-tabset")[1]
driver.execute_script("arguments[0].setAttribute('class','tabset cur-tab-3 journal-data-tabset');",set2)
time.sleep(1)
actions = ActionChains(driver)
menu = driver.find_element_by_css_selector("div.metric-trend-graph-totalCites >div.journal-data-tabset >div.tabset-head >div.tab-3")
actions.move_to_element(menu)
actions.click(menu)
actions.perform()
time.sleep(5)
driver.find_element_by_css_selector("div[help='jcrJournalProfileRank'][_component='72'] >div >div.component-title >div.expand-button").click()
numa = random.randint(0,9999)
driver.find_element_by_css_selector("div[help='jcrJournalProfileRank'][_component='72'] >div >div.component-body >div.data-table >div.data-panel >div.c >div.th").screenshot("./head"+numok+str(numa)+".png")
headrank = "head"+numok+str(numa)
list2.append({'pic':headrank})
numb = random.randint(0,9999)
driver.find_element_by_css_selector("div[help='jcrJournalProfileRank'][_component='72'] >div >div.component-body >div.data-table >div.data-panel >div.c >div.tb >table >tbody >tr[row='" + str(niancha) + "']").screenshot("./body"+numok+str(numb)+".png")
bodyrank = "body"+numok+str(numb)
list2.append({'pic':bodyrank})
print(bodyrank)
#nianrank = nian+' '+ driver.find_elements_by_css_selector("[row='" + str(niancha) + "']>td>div.jifQuartile")[2].text + ' ' + driver.find_elements_by_css_selector("[row='" + str(niancha) + "']>td>div.jifPercentile")[2].text + ' '+ driver.find_elements_by_css_selector("[row='" + str(niancha) + "']>td>div.jifQuartile")[3].text +' ' +driver.find_elements_by_css_selector("[row='" + str(niancha) + "']>td>div.jifPercentile")[3].text
#list2.append(nianrank)
time.sleep(5)
driver.quit()
os.system('taskkill /im chrome.exe /f')
os.system('taskkill /im chromedriver.exe /f')
time.sleep(5)
print(list2)
return list2
#2018 end
else:
driver.switch_to.window(all_h[0])
driver.find_element_by_id("show_journal_overlay_link_1").click()
time.sleep(5)
if '/' in str(num):
numok = str(num.split('/',1)[0])
else:
numok = str(num)
numc = random.randint(0,9999)
rank2019 = "2019"+numok+str(numc)
driver.find_element_by_class_name("overlayJCRblock").screenshot("./"+rank2019+".png")
list2.append({'pic':rank2019})
time.sleep(5)
driver.quit()
os.system('taskkill /im chrome.exe /f')
os.system('taskkill /im chromedriver.exe /f')
time.sleep(5)
print(list2)
return list2
wb = xw.Book(r'b2.xls')
sht = wb.sheets['Sheet1']
sht.used_range.last_cell
a1_c4_value = sht.range((1,1),sht.used_range.last_cell).options(ndim=2).value
#print(a1_c4_value)
names = []
for i in a1_c4_value:
for j in i:
if '英文题名' in str(j):
names.append(j.split(':',1)[1])
#print(j.split(':',1)[1])
print(names)
len1 = len(names)
lunwen = []
for i,k in enumerate(names):
try:
lw = listweb(k)
except:
raise e
i = i+1
print(str(i)+"Unexpected error:")
#print(m)
# TODO: write code...
else:
bti = ['第 1 条,共 9 条']
bti = '第 '+str(i+1)+ ' 条,共 '+str(len1)+' 条'
bti = '检索结果:以下文章被 Science Citation Index Expanded(SCIE) 收录,共'+_to_chinese4(len1)+' 条'
btilist = [bti]
lunwen.append(btilist)
lunwen.append(lw)
finally:
print(str(i)+'ok')
t = datetime.datetime.now().strftime('%Y年%m月%d日')
lunwen3 =[{'pic': {'foot':['','',' 检索单位:',' ',' 委 托 人:',' 检索证明人:',' 检索日期:'+t]}}]
lunwen.append(lunwen3)
#import xlwings as xw
filepath=r'检索模板d.xls'
app2=xw.App(visible=False,add_book=False)
wb2=app2.books.open(filepath)
sht2 = wb2.sheets['SCI证明']
rng = sht2.range('A1').expand()
ncol = rng.last_cell.column
bti = '收录,共'+_to_chinese4(len1)+'条'
sht2.range('A4').value = bti
sht2.range('A4').api.VerticalAlignment = -4130
sht2.range('A4').api.WrapText = True
sht2.range('A4').api.Font.Size =15
b=0
c = sum(lunwen,[])
for i,k in enumerate(c):
if isinstance(k,str):
rng = sht2.range('A1').expand()
ncol = rng.last_cell.column
#print(ncol)
i = i+ncol+b
sht2.range('A'+str(4+i)).value=k
sht2.range('A'+str(4+i)).api.VerticalAlignment = -4130
sht2.range('A'+str(4+i)).api.WrapText = True
else:
#print(i)
#print("d is not int")
rng = sht2.range('A1').expand()
ncol = rng.last_cell.column
i = i+ncol+b
picpng = k['pic']
if isinstance(picpng,dict):
sht2.range('A'+str(4+i)).options(transpose=True).value=picpng['foot']
sht2.range('A'+str(4+i)).api.VerticalAlignment = -4152
sht2.range('A'+str(4+i)).api.WrapText = True
#print(picpng)
if(picpng[:4] == 'head'):
pic = sht2.pictures.add(r'.\\'+str(picpng)+'.png',left=sht2.range('A'+str(4+i)).left,top=sht2.range('A'+str(4+i)).top)
pic.width = 460
b = b+2
if(picpng[:4] == 'body'):
pic = sht2.pictures.add(r'.\\'+str(picpng)+'.png',left=sht2.range('A'+str(4+i)).left,top=sht2.range('A'+str(4+i)).top)
pic.width = 460
b = b+1
if(picpng[:4] == '2019'):
pic = sht2.pictures.add(r'.\\'+str(picpng)+'.png',left=sht2.range('A'+str(4+i)).left,top=sht2.range('A'+str(4+i)).top)
b = b+13
numc = random.randint(0,99)
print(numc)
wb2.save("检索证明模板"+str(numc)+".xls")
wb2.close()
app2.quit()
```
- py转exe
- py服务器
- py登录
- py查询
- 爬TB热卖1017
- 爬TB分类1015
- 爬BD文库1010
- 爬文库924
- json917
- decode912
- 转码910
- radians829
- turtle827
- 画叶子820
- 树干815
- Turtle, mainloop718
- sorted716
- random711
- turtle.colormode709
- abs704
- begin_fill702
- randint,random627
- isalpha625
- keyword620
- compile618
- judge613
- list1.copy611
- random.choice606
- format_randint604
- save_csv528
- jieba523
- wordcloud521
- wordcloud516
- jieba.lcut514
- counts503
- format423
- 花瓣418
- strftime416
- turtle.fillcolor411
- random.random409
- pensize404
- 花蕊328
- input327
- eval_float320
- open319
- while314
- JCR影响因子313
- _to_chinese4_312
- encoding306
- read305
- flask1029