最近看了介紹股票籌碼相關的節目,介紹可以透過基金持股來判斷,
基金的月報表會在每個月的第10個工作天公布上個前10大持股,
所以用 Python 抓了基金的網站來做最新兩個月的持股來做比較
使用
Python3
mechanicalsoup
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy | |
import mechanicalsoup | |
import pandas as pd | |
all_df = pd.DataFrame([], columns = ['Fund', 'Date', 'StockNo', 'StockName', 'Money']) | |
all_df.set_index(['Date', 'Fund'], inplace = True) | |
browser = mechanicalsoup.StatefulBrowser( | |
soup_config={'features': 'lxml', 'from_encoding':'utf-8'}, | |
raise_on_404=True, | |
user_agent='Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.3; Win64; x64)' | |
) | |
req = browser.open("https://www.sitca.org.tw/ROC/Industry/IN2629.aspx") | |
def dl_month (): | |
soup = browser.get_current_page() | |
options = soup.find("select", {"name": "ctl00$ContentPlaceHolder1$ddlQ_YM"}).find_all ('option') | |
return [options[-2]["value"], options[-1]["value"]] | |
def dl_page (month, calss_type): | |
form = browser.select_form('#aspnetForm') | |
form.form.find("select", {"name": "ctl00$ContentPlaceHolder1$ddlQ_Comid"})["disabled"] = "disabled" | |
del form.form.find("select", {"name": "ctl00$ContentPlaceHolder1$ddlQ_Class"})["disabled"] | |
form.form.find("select", {"name": "ctl00$ContentPlaceHolder1$ddlQ_Comid1"})["disabled"] = "disabled" | |
form.form.find("select", {"name": "ctl00$ContentPlaceHolder1$ddlQ_Class1"})["disabled"] = "disabled" | |
browser["ctl00$ContentPlaceHolder1$ddlQ_YM"] = month | |
browser["ctl00$ContentPlaceHolder1$rdo1"] = "rbClass" | |
browser["ctl00$ContentPlaceHolder1$ddlQ_Class"] = calss_type | |
form.choose_submit("ctl00$ContentPlaceHolder1$BtnQuery") | |
response = browser.submit_selected() | |
#browser.launch_browser() | |
soup = browser.get_current_page() | |
tag = soup.select('table')[3].find_all ('td') | |
tag_len = len (tag) | |
df = pd.DataFrame([], columns = ['Fund', 'StockNo', 'StockName', 'Money']) | |
fund_name = "" | |
idx = 10 | |
while idx < tag_len: | |
if tag[idx].text == "合計": | |
fund_name = "" | |
idx += 2 | |
continue | |
if fund_name == "": | |
fund_name = tag[idx].text.strip() | |
idx += 1 | |
continue | |
stock_type = tag[idx+1].text.strip() | |
if stock_type != "國內上市" and stock_type != "國內上櫃": | |
idx = idx + 9 | |
continue | |
df = df.append ({ | |
'Fund': fund_name, | |
"StockNo" : tag[idx+2].text, | |
"StockName" : tag[idx+3].text, | |
"Money" : numpy.int64 (tag[idx+4].text.replace(',', ''))}, | |
ignore_index=True) | |
idx = idx + 9 | |
df['Date'] = month | |
df.set_index(['Date', 'Fund'], inplace = True) | |
return df | |
months = dl_month () | |
for m in months: | |
dl_page (m, 'AA1') #第一次會失效?? | |
df1 = dl_page (m, 'AA1') | |
df2 = dl_page (m, 'AB1') | |
df3 = dl_page (m, 'AI1') | |
df4 = dl_page (m, 'AH1') | |
all_df = pd.concat([all_df, df1, df2, df3, df4], sort=True) | |
#月別基金變動 | |
print ("月別基金變動") | |
all_df.index.unique(level='Fund') | |
index_fund = all_df.index.unique(level='Fund') | |
for fund in index_fund: | |
df = all_df[all_df.index.get_level_values('Fund').isin([fund])] | |
prev_m = df[df.index.get_level_values('Date').isin([months[0]])] | |
last_m = df[df.index.get_level_values('Date').isin([months[1]])] | |
prev_stock = prev_m['StockNo'].tolist () | |
last_stock = last_m['StockNo'].tolist () | |
#保留持股 | |
intersection =list(set(prev_stock).intersection(last_stock)) | |
print (fund + " 移出前十持股") | |
for s in prev_stock: | |
if s not in intersection: | |
stockname = df[df['StockNo'] == str(s)].StockName[0] | |
print ("["+str(s)+"] "+ stockname) | |
print (fund + " 新增前十持股") | |
for s in last_stock: | |
if s not in intersection: | |
stockname = df[df['StockNo'] == str(s)].StockName[0] | |
print ("["+str(s)+"] "+ stockname) | |
print (fund + " 保留前十持股") | |
for s in intersection: | |
stockname = df[df['StockNo'] == str(s)].StockName[0] | |
print ("["+str(s)+"] "+ stockname) | |
#個股增減變動 | |
print ("個股增減變動") | |
index_stock = sorted (list(set(all_df['StockNo'].tolist ()))) | |
prev_m = all_df[all_df.index.get_level_values('Date').isin([months[0]])] | |
last_m = all_df[all_df.index.get_level_values('Date').isin([months[1]])] | |
for s in index_stock: | |
stockname = all_df[all_df['StockNo'] == str(s)].StockName[0] | |
prev_data = prev_m[prev_m['StockNo'] == str(s)] | |
prev_money = 0 | |
if not prev_data.empty: | |
prev_money = prev_data.Money.sum() | |
last_data = last_m[last_m['StockNo'] == str(s)] | |
last_money = 0 | |
if not last_data.empty: | |
last_money = last_data.Money.sum() | |
if prev_money <= 0: | |
print ("["+str(s)+"] "+ stockname + " 新增十大持股") | |
elif last_money <= 0: | |
print ("["+str(s)+"] "+ stockname + " 移出十大持股") | |
elif last_money > prev_money: | |
print ("["+str(s)+"] "+ stockname + " 十大持股增加") | |
else: | |
print ("["+str(s)+"] "+ stockname + " 十大持股減少") |
沒有留言:
張貼留言