70B Model and 8B Model was used in order to do classifying tasks on 3200 cells in order to classify 1050 cells to 59 sectors. Data from Forbes Billionair list. As it is known that Billionairs have the better returns, this is a tool, the 30 year data to validate the evolution of billionairs and sectors. The goal being better informed and better overview over the Financial Market.
The Code Handshake between Ollama and Excel as to be able to do local AI Excel in order to be using my M1 Max 64Gb 24C GPU 10C CPU in order to be able to let the AI do local AI on Excel and the ability to not use AI Credits.
Export ALL data for : S&P500 , Nasdac100, Stoxx200, Stoxx50 , Brazil 57, CAC40 france , Daxx40 Germany, Spain 25, Taiwan 30, Mexico 35, Korea 80, FTSE100 UK , Japan 180 , Hongkong 400 , Indonesia 40 , Portugal 20 each company, 50 year data daiy level.
This generates CSV file of 1500x50x365 excel cells thus 27375000 CSV cells of which around 14million cells are filled.
Export ALL data for : S&P500 , Nasdac100, Stoxx200, Stoxx50 , Brazil 57, CAC40 france , Daxx40 Germany, Spain 25, Taiwan 30, Mexico 35, Korea 80, FTSE100 UK , Japan 180 , Hongkong 400 , Indonesia 40 , Portugal 20 each company, 50 year data daiy level.
This generates CSV file of 1500x50x365 excel cells thus 27375000 CSV cells of which around 14million cells are filled.
import pandas as pd
import yfinance as yf
import ssl
import urllib.request
# --- BYPASS SSL CERTIFICATE ISSUES ---
ssl._create_default_https_context = ssl._create_unverified_context
def get_south_korea_tickers():
"""
Manual list of the MSCI South Korea Index constituents (~87 stocks).
Using .KS suffix for KOSPI (Main Board).
"""
# This list covers the major heavyweights and mid-caps in the index
tickers_str = (
"005930.KS, 000660.KS, 373220.KS, 207940.KS, 005380.KS, 068270.KS, 005490.KS, 051910.KS, "
"105560.KS, 000270.KS, 035420.KS, 006400.KS, 035720.KS, 003550.KS, 012330.KS, 032830.KS, "
"055550.KS, 066570.KS, 096770.KS, 000810.KS, 033780.KS, 086790.KS, 009150.KS, 010140.KS, "
"017670.KS, 015760.KS, 003670.KS, 329180.KS, 018260.KS, 316140.KS, 011780.KS, 024110.KS, "
"034730.KS, 009540.KS, 010950.KS, 259960.KS, 086280.KS, 034220.KS, 011170.KS, 004020.KS, "
"005940.KS, 000100.KS, 028260.KS, 047050.KS, 001040.KS, 036570.KS, 009830.KS, 051900.KS, "
"023530.KS, 030200.KS, 011200.KS, 005830.KS, 161390.KS, 267260.KS, 000720.KS, 090430.KS, "
"006800.KS, 047810.KS, 010130.KS, 012450.KS, 302440.KS, 003490.KS, 138040.KS, 010120.KS, "
"028300.KS, 196170.KS, 323410.KS, 271560.KS, 064350.KS, 032640.KS, 402340.KS, 034020.KS, "
"003230.KS, 326030.KS, 180640.KS, 443060.KS, 005935.KS, 005385.KS, 0126Z0.KS, 008770.KS, "
"042660.KS, 000670.KS, 047040.KS, 000080.KS, 000150.KS, 001450.KS, 004370.KS"
)
return [t.strip() for t in tickers_str.split(",")]
# --- PHASE 1: GET TICKERS & VALIDATE MARKET CAPS ---
tickers = get_south_korea_tickers()
print(f"Phase 1: Validating {len(tickers)} South Korea Tickers and fetching Market Caps...")
cap_list = []
for t in tickers:
try:
stock = yf.Ticker(t)
# KRW is the currency for South Korean stocks
m_cap = stock.info.get('marketCap', 0)
currency = stock.info.get('currency', 'KRW')
if m_cap > 0:
cap_list.append({'Ticker': t, 'MarketCap': m_cap, 'Currency': currency})
print(f"Success: {t} | Cap: {m_cap:,} {currency}")
else:
print(f"Verified (No Cap): {t}")
except Exception:
print(f"FAILED fetch for: {t}")
# Sort by Market Cap
sorted_df = pd.DataFrame(cap_list)
if not sorted_df.empty:
sorted_df = sorted_df.sort_values(by='MarketCap', ascending=False)
ordered_tickers = sorted_df[sorted_df['MarketCap'] > 0]['Ticker'].tolist()
else:
print("No valid data found.")
exit()
# --- PHASE 2: DOWNLOAD HISTORICAL DATA ---
print(f"\nPhase 2: Downloading Daily Data for {len(ordered_tickers)} stocks...")
start = "1976-01-01"
end = "2026-03-21"
data = yf.download(ordered_tickers, start=start, end=end, auto_adjust=True)
# Format for export (identical to your Taiwan/Portugal logic)
if isinstance(data.columns, pd.MultiIndex):
close_data = data['Close']
else:
close_data = data
valid_tickers = [t for t in ordered_tickers if t in close_data.columns]
close_data = close_data[valid_tickers]
# --- PHASE 3: EXPORT ---
df_wide = close_data.transpose()
df_wide.columns = pd.to_datetime(df_wide.columns).strftime('%Y-%m-%d')
file_name = f"SouthKorea_MSCI_MarketCap_Sorted_{len(valid_tickers)}.csv"
df_wide.to_csv(file_name)
print("-" * 30)
print(f"Done! Exported {len(valid_tickers)} stocks to {file_name}")
China Export 400 Stocks
import pandas as pd
import yfinance as yf
import ssl
import numpy as np
# --- BYPASS SSL CERTIFICATE ISSUES ---
ssl._create_default_https_context = ssl._create_unverified_context
def get_japan_tickers_full():
"""
Manual list of the MSCI Japan constituents (181 stocks).
Using .T suffix for Tokyo Stock Exchange.
"""
tickers_str = (
"7203.T, 8306.T, 6501.T, 6758.T, 8316.T, 6857.T, 8035.T, 8411.T, 8058.T, 7011.T, "
"6861.T, 9984.T, 8031.T, 8001.T, 4063.T, 9983.T, 8766.T, 4502.T, 7974.T, 6098.T, "
"6902.T, 4503.T, 6702.T, 6301.T, 7267.T, 6503.T, 8053.T, 4519.T, 6981.T, 6367.T, "
"9432.T, 9433.T, 2914.T, 6146.T, 6273.T, 5108.T, 8801.T, 8802.T, 6723.T, 7741.T, "
"4901.T, 4661.T, 6954.T, 1925.T, 1928.T, 9101.T, 9104.T, 9107.T, 8002.T, 8015.T, "
"7733.T, 4568.T, 9020.T, 9021.T, 9022.T, 4689.T, 7261.T, 7201.T, 3407.T, 3402.T, "
"5401.T, 5411.T, 6504.T, 6506.T, 6762.T, 6869.T, 7751.T, 7752.T, 8037.T, 8267.T, "
"8308.T, 8309.T, 8601.T, 8604.T, 8630.T, 8725.T, 8750.T, 9201.T, 9202.T, 9501.T, "
"9502.T, 9503.T, 9613.T, 9735.T, 9843.T, 1605.T, 1801.T, 1802.T, 1803.T, 1812.T, "
"2267.T, 2502.T, 2503.T, 2801.T, 2802.T, 3088.T, 3092.T, 3289.T, 3382.T, 4021.T, "
"4183.T, 4188.T, 4324.T, 4452.T, 4507.T, 4523.T, 4543.T, 4578.T, 4612.T, 4704.T, "
"4732.T, 4768.T, 4911.T, 5020.T, 5201.T, 5332.T, 5713.T, 5802.T, 5901.T, 6113.T, "
"6326.T, 6471.T, 6479.T, 6586.T, 6645.T, 6701.T, 6752.T, 6920.T, 6971.T, 7012.T, "
"7013.T, 7269.T, 7270.T, 7272.T, 7309.T, 7532.T, 7832.T, 7911.T, 7912.T, 8113.T, "
"8227.T, 8233.T, 8252.T, 8331.T, 8354.T, 8355.T, 8410.T, 8473.T, 8591.T, 8593.T, "
"8830.T, 9001.T, 9005.T, 9007.T, 9008.T, 9009.T, 9041.T, 9042.T, 9064.T, 9301.T, "
"9508.T, 9531.T, 9532.T, 9602.T, 9684.T, 9697.T, 9766.T, 9989.T, 2269.T, 2413.T, "
"3436.T, 3659.T, 4151.T, 4528.T, 4921.T, 6724.T, 6976.T, 7182.T, 7186.T, 9434.T"
)
return [t.strip() for t in tickers_str.split(",")]
# --- PHASE 1: GET TICKERS & VALIDATE MARKET CAPS ---
print("Phase 1: Validating 181 Japan Tickers and fetching Market Caps...")
tickers = get_japan_tickers_full()
cap_list = []
# Note: This list is long, so we print progress
total = len(tickers)
for i, t in enumerate(tickers, 1):
try:
stock = yf.Ticker(t)
m_cap = stock.info.get('marketCap', 0)
currency = stock.info.get('currency', 'JPY')
cap_list.append({'Ticker': t, 'MarketCap': m_cap, 'Currency': currency})
if i % 20 == 0:
print(f"Progress: {i}/{total} verified...")
except Exception:
cap_list.append({'Ticker': t, 'MarketCap': 0, 'Currency': 'N/A'})
# Sort by Market Cap
sorted_df = pd.DataFrame(cap_list)
if not sorted_df.empty and 'MarketCap' in sorted_df.columns:
sorted_df = sorted_df.sort_values(by='MarketCap', ascending=False)
ordered_tickers = sorted_df[sorted_df['MarketCap'] > 0]['Ticker'].tolist()
else:
print("No valid MarketCap data found.")
exit()
# --- PHASE 2: DOWNLOAD HISTORICAL DATA ---
print(f"\nPhase 2: Downloading Daily Data for {len(ordered_tickers)} stocks...")
start = "1976-01-01"
end = "2026-03-21"
# Bulk download historical prices
data = yf.download(ordered_tickers, start=start, end=end, auto_adjust=True)
if isinstance(data.columns, pd.MultiIndex):
close_data = data['Close']
else:
close_data = data
# Filter only tickers that actually downloaded data and reorder by Market Cap
valid_tickers = [t for t in ordered_tickers if t in close_data.columns]
close_data = close_data[valid_tickers]
# --- PHASE 3: EXPORT ---
df_wide = close_data.transpose()
df_wide.columns = pd.to_datetime(df_wide.columns).strftime('%Y-%m-%d')
file_name = f"Japan_MSCI_MarketCap_Sorted_{len(valid_tickers)}.csv"
df_wide.to_csv(file_name)
print("-" * 30)
print(f"Done! Exported {len(valid_tickers)} stocks to {file_name}")
^GDAXI: DAX Performance Index
import pandas as pd
import yfinance as yf
import os
# 1. Added .DE to tickers to point to the German XETRA exchange
raw_tickers = ["SAP","SIE","ALV","DTE","ENR","RHM","MUV2","IFX","DBK","DB1","EOAN","DHL","BAS","MBG","BAYN","RWE","ADS","HEI","CBK","BMW","VNA","FRE","VOW3","DTG","MTX","HNR1","MRK","SHL","HEN3","SY1","G1A","FME","QIA","BEI","KBX","HEN","CON","TLX","HOT","SRT3","BNR","G24","PAH3","LEG","LHA","ZAL","HAG","BMW3","P911","EVD","NEM","EVK","RAA","DHER"]
tickers = [t + ".DE" for t in raw_tickers]
print("Phase 1: Ranking Tickers by Market Cap Weight...")
cap_list = []
# Fetching info for German tickers
for t in tickers:
try:
ticker_obj = yf.Ticker(t)
# Some German stocks report marketCap in EUR; yfinance handles conversion generally
m_cap = ticker_obj.info.get('marketCap', 0)
cap_list.append({'Ticker': t, 'MarketCap': m_cap})
print(f"Fetched: {t}")
except Exception as e:
print(f"Could not fetch {t}: {e}")
cap_list.append({'Ticker': t, 'MarketCap': 0})
# Sort list by MarketCap descending
sorted_df = pd.DataFrame(cap_list).sort_values(by='MarketCap', ascending=False)
ordered_tickers = sorted_df['Ticker'].tolist()
print(f"\nSorting complete. Top stock: {ordered_tickers[0]}. Bottom: {ordered_tickers[-1]}.")
# Phase 2: Download Daily Data
start = "1976-01-01"
end = "2026-02-21"
print(f"\nPhase 2: Downloading data for {len(ordered_tickers)} tickers...")
# Download data (this will now find the .DE symbols)
data = yf.download(ordered_tickers, start=start, end=end, auto_adjust=True)["Close"]
# Reindex and Transpose
data = data[ordered_tickers]
df_wide = data.transpose()
df_wide.columns = df_wide.columns.strftime('%Y-%m-%d')
# Save to CSV
file_name = "MSCI_GERMANY_WEIGHTED_DAILY.csv"
df_wide.to_csv(file_name)
print("-" * 30)
print(f"Success! File saved at: {os.getcwd()}/{file_name}")
import pandas as pd
import yfinance as yf
import ssl
import urllib.request
# --- BYPASS SSL CERTIFICATE ISSUES ---
ssl._create_default_https_context = ssl._create_unverified_context
def get_cac40_tickers():
"""
Automated scrape of the CAC 40 constituents from Wikipedia.
Targeting Euronext Paris tickers (.PA).
"""
url = "https://en.wikipedia.org/wiki/CAC_40"
req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0'})
try:
with urllib.request.urlopen(req) as response:
tables = pd.read_html(response)
# For the CAC 40 page, the constituents table is usually the 4th table (index 3 or 4)
# but we'll find it by looking for the 'Ticker' column.
df = None
for t in tables:
if 'Ticker' in t.columns:
df = t
break
if df is None:
print("Could not find the Ticker column in any table.")
return []
# Format for Yahoo Finance: Most CAC 40 stocks end in .PA
# ArcelorMittal (MT.AS) and Stellantis (STLAP.PA) are handled by the scrape directly
tickers = df['Ticker'].astype(str).tolist()
return tickers
except Exception as e:
print(f"Error fetching CAC 40 list: {e}")
return []
# --- PHASE 1: GET TICKERS & VALIDATE MARKET CAPS ---
tickers = get_cac40_tickers()
if not tickers:
print("CRITICAL: No tickers were retrieved.")
exit()
print(f"Phase 1: Validating {len(tickers)} CAC 40 Tickers and fetching Market Caps...")
cap_list = []
for t in tickers:
try:
stock = yf.Ticker(t)
info = stock.info
m_cap = info.get('marketCap', 0)
# Currency is usually EUR for CAC 40
currency = info.get('currency', 'EUR')
cap_list.append({'Ticker': t, 'MarketCap': m_cap, 'Currency': currency})
print(f"Success: {t} | Cap: {m_cap:,} {currency}")
except Exception:
print(f"FAILED info fetch for: {t}")
cap_list.append({'Ticker': t, 'MarketCap': 0, 'Currency': 'N/A'})
# Sort by Market Cap
sorted_df = pd.DataFrame(cap_list)
if not sorted_df.empty and 'MarketCap' in sorted_df.columns:
sorted_df = sorted_df.sort_values(by='MarketCap', ascending=False)
ordered_tickers = sorted_df[sorted_df['MarketCap'] > 0]['Ticker'].tolist()
else:
print("No valid MarketCap data found.")
exit()
# --- PHASE 2: DOWNLOAD HISTORICAL DATA ---
# Using your requested end date of 2026-03-19
print(f"\nPhase 2: Downloading Daily Data for {len(ordered_tickers)} stocks...")
start = "1976-01-01"
end = "2026-03-19"
data = yf.download(ordered_tickers, start=start, end=end, auto_adjust=True)
if isinstance(data.columns, pd.MultiIndex):
close_data = data['Close']
else:
close_data = data
# Ensure only valid downloaded tickers are included
valid_tickers = [t for t in ordered_tickers if t in close_data.columns]
close_data = close_data[valid_tickers]
# --- PHASE 3: EXPORT ---
df_wide = close_data.transpose()
df_wide.columns = pd.to_datetime(df_wide.columns).strftime('%Y-%m-%d')
file_name = "CAC40_MarketCap_Sorted.csv"
df_wide.to_csv(file_name)
print("-" * 30)
print(f"Done! Exported {len(valid_tickers)} stocks to {file_name}")
import pandas as pd
import yfinance as yf
import ssl
import numpy as np
# --- BYPASS SSL CERTIFICATE ISSUES ---
ssl._create_default_https_context = ssl._create_unverified_context
def get_brazil_tickers_full():
"""
Comprehensive manual list of the Ibovespa components to ensure
the script never returns an incomplete 'overview'.
"""
# This is the current complete list of the Bovespa (B3) main index
full_list = [
"VALE3.SA", "PETR4.SA", "ITUB4.SA", "PETR3.SA", "BBDC4.SA", "ABEV3.SA",
"WEGE3.SA", "BBAS3.SA", "ITSA4.SA", "B3SA3.SA", "EQTL3.SA", "RENT3.SA",
"PRIO3.SA", "RDOR3.SA", "SUZB3.SA", "VIVT3.SA", "SBSP3.SA", "LREN3.SA",
"GGBR4.SA", "TIMS3.SA", "RADL3.SA", "VBBR3.SA", "UGPA3.SA", "CMIG4.SA",
"RAIL3.SA", "CPLE6.SA", "CSAN3.SA", "HAPV3.SA", "ELET3.SA", "ELET6.SA",
"CCRO3.SA", "CPFE3.SA", "CSNA3.SA", "EMBR3.SA", "KLBN11.SA", "ENGI11.SA",
"EGIE3.SA", "ASAI3.SA", "TOTS3.SA", "ALOS3.SA", "CYRE3.SA", "MULT3.SA",
"SOMA3.SA", "CRFB3.SA", "MRFG3.SA", "BRFS3.SA", "GOAU4.SA", "COGN3.SA",
"AZUL4.SA", "CVCB3.SA", "EZTC3.SA", "MRVE3.SA", "BEEF3.SA", "SMTO3.SA",
"YDUQ3.SA", "PETZ3.SA", "BHIA3.SA", "DXCO3.SA", "MGLU3.SA", "FLRY3.SA",
"ALPA4.SA", "ARZZ3.SA", "RAIZ4.SA", "RECV3.SA", "SLCE3.SA", "VIVA3.SA",
"TRPL4.SA", "POMO4.SA", "SMLS3.SA", "BPAC11.SA", "STBP3.SA", "BRAP4.SA"
]
return full_list
# --- PHASE 1: GET TICKERS & VALIDATE MARKET CAPS ---
print("Phase 1: Validating Full Brazil Ticker List (80+ Stocks)...")
tickers = get_brazil_tickers_full()
cap_data = []
for t in tickers:
try:
stock = yf.Ticker(t)
# Using fast_info for quicker validation
mcap = stock.info.get('marketCap', 0)
if mcap > 0:
cap_data.append({'Ticker': t, 'MarketCap': mcap})
print(f"Added: {t} | {mcap:,.0f} BRL")
else:
# Some tickers like ELET3 sometimes require a re-try or are temporarily blank
print(f"No Cap data for {t}, checking alternative info...")
# Retry one more time with basic info
mcap = stock.history(period="1d")['Close'].iloc[-1] * stock.info.get('sharesOutstanding', 0)
if mcap > 0:
cap_data.append({'Ticker': t, 'MarketCap': mcap})
except:
continue
if not cap_data:
print("CRITICAL: No Market Cap data found.")
exit()
# Sort by Market Cap
df_caps = pd.DataFrame(cap_data).sort_values('MarketCap', ascending=False)
ordered_tickers = df_caps['Ticker'].tolist()
# --- PHASE 2: DOWNLOAD DATA ---
print(f"\nPhase 2: Downloading Daily Data for {len(ordered_tickers)} stocks...")
# Set to today's date
data = yf.download(ordered_tickers, start="1976-01-01", end="2026-03-19", auto_adjust=True)
if isinstance(data.columns, pd.MultiIndex):
close_data = data['Close']
else:
close_data = data
# Clean data: only keep tickers that actually have prices
valid_final_tickers = [t for t in ordered_tickers if t in close_data.columns]
close_data = close_data[valid_final_tickers]
# --- PHASE 3: EXPORT ---
# Transpose so rows = Tickers, columns = Dates
df_wide = close_data.transpose()
df_wide.columns = pd.to_datetime(df_wide.columns).strftime('%Y-%m-%d')
file_name = f"Brazil_MarketCap_Sorted_FULL_OVERVIEW_{len(valid_final_tickers)}.csv"
df_wide.to_csv(file_name)
print("-" * 30)
print(f"Success! Exported {len(valid_final_tickers)} stocks to {file_name}")
import pandas as pd
import yfinance as yf
import ssl
import urllib.request
import numpy as np
# --- BYPASS SSL CERTIFICATE ISSUES ---
ssl._create_default_https_context = ssl._create_unverified_context
def apply_25_50_capping(df_caps):
"""
Applies MSCI 25/50 Rule:
1. No single stock > 25% (Target 22.5% buffer)
2. Sum of all stocks > 5% must be <= 50% (Target 45% buffer)
"""
total_mcap = df_caps['MarketCap'].sum()
df_caps['Weight'] = df_caps['MarketCap'] / total_mcap
df_caps = df_caps.sort_values('Weight', ascending=False).reset_index(drop=True)
for _ in range(100):
# RULE 1: Cap single at 22.5%
excess = 0
for i in range(len(df_caps)):
if df_caps.loc[i, 'Weight'] > 0.225:
excess += df_caps.loc[i, 'Weight'] - 0.225
df_caps.loc[i, 'Weight'] = 0.225
if excess > 0:
uncapped_mask = df_caps['Weight'] < 0.225
df_caps.loc[uncapped_mask, 'Weight'] += excess * (df_caps.loc[uncapped_mask, 'Weight'] / df_caps.loc[uncapped_mask, 'Weight'].sum())
# RULE 2: Group > 5% must be <= 45%
large_weights_mask = df_caps['Weight'] > 0.048
sum_large = df_caps.loc[large_weights_mask, 'Weight'].sum()
if sum_large > 0.45:
excess_large = sum_large - 0.45
df_caps.loc[large_weights_mask, 'Weight'] *= (0.45 / sum_large)
small_weights_mask = ~large_weights_mask
df_caps.loc[small_weights_mask, 'Weight'] += excess_large * (df_caps.loc[small_weights_mask, 'Weight'] / df_caps.loc[small_weights_mask, 'Weight'].sum())
if df_caps['Weight'].max() <= 0.25 and df_caps[df_caps['Weight'] > 0.05]['Weight'].sum() <= 0.50:
break
return df_caps
def get_spanish_tickers():
url = "https://en.wikipedia.org/wiki/IBEX_35"
req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0'})
try:
with urllib.request.urlopen(req) as response:
tables = pd.read_html(response)
# The constituents table is almost always the one with 'Ticker' or 'Symbol'
# We filter for the table that actually contains Spanish company tickers
for df in tables:
if 'Ticker' in df.columns or 'Symbol' in df.columns:
col = 'Ticker' if 'Ticker' in df.columns else 'Symbol'
# Clean out non-ticker strings (metadata like 'EXCHANGE')
tickers = df[col].astype(str).str.strip()
# Filter out anything that looks like a sentence/header
tickers = [t for t in tickers if len(t) <= 5 and t.isupper()]
if len(tickers) > 20: # IBEX has 35
return [t + ".MC" for t in tickers]
except Exception as e:
print(f"Scrape failed: {e}")
# Solid fallback list of major Spanish tickers
return ["SAN.MC", "IBE.MC", "BBVA.MC", "ITX.MC", "TEF.MC", "REP.MC", "CABK.MC", "FER.MC", "AMS.MC", "SAB.MC"]
# --- EXECUTION ---
print("Phase 1: Fetching and Cleaning Tickers...")
raw_tickers = get_spanish_tickers()
cap_data = []
for t in raw_tickers:
try:
stock = yf.Ticker(t)
mcap = stock.info.get('marketCap')
if mcap and mcap > 0:
cap_data.append({'Ticker': t, 'MarketCap': mcap})
print(f"Found: {t}")
except:
continue
if not cap_data:
print("Error: No market cap data found. Check your internet connection.")
else:
df_caps = pd.DataFrame(cap_data)
print("\nPhase 2: Applying 25/50 Capping Logic...")
df_final = apply_25_50_capping(df_caps)
print("\n--- CONCENTRATION CHECK ---")
print(f"Max Weight: {df_final['Weight'].max()*100:.2f}%")
print(f"Top Group (>5%) Sum: {df_final[df_final['Weight'] > 0.05]['Weight'].sum()*100:.2f}%")
print("\nPhase 3: Downloading Price Data...")
ordered_list = df_final['Ticker'].tolist()
data = yf.download(ordered_list, start="1976-01-01", end="2026-03-19", auto_adjust=True)
if not data.empty:
# Save results
final_prices = data['Close'][ordered_list].transpose()
final_prices.to_csv("MSCI_Spain_Cleaned_2550.csv")
df_final[['Ticker', 'Weight']].to_csv("Spain_Weights_Final.csv", index=False)
print(f"\nSuccess! Exported to 'MSCI_Spain_Cleaned_2550.csv'")
else:
print("Failed to download price data.")
import pandas as pd
import yfinance as yf
import ssl
import urllib.request
# --- BYPASS SSL CERTIFICATE ISSUES ---
ssl._create_default_https_context = ssl._create_unverified_context
def get_taiwan_tickers():
"""
Automated scrape of the Taiwan 50 (FTSE TWSE Taiwan 50)
merged with your manual MSCI Taiwan list for maximum completeness.
"""
url = "https://en.wikipedia.org/wiki/FTSE_TWSE_Taiwan_50_Index"
req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0'})
scraped_tickers = []
try:
with urllib.request.urlopen(req) as response:
tables = pd.read_html(response)
# The constituents table is typically the one with 'Ticker' or 'Symbol'
df = None
for t in tables:
if any(col in t.columns for col in ['Ticker', 'Symbol', 'Stock code']):
df = t
break
if df is not None:
ticker_col = next(col for col in df.columns if col in ['Ticker', 'Symbol', 'Stock code'])
for t in df[ticker_col].astype(str):
clean_t = t.strip()
# Ensure suffix exists. Defaulting to .TW for Taiwan Stock Exchange
if not (clean_t.endswith('.TW') or clean_t.endswith('.TWO')):
clean_t += '.TW'
scraped_tickers.append(clean_t)
except Exception as e:
print(f"Scrape failed (Index page might have moved): {e}")
# Your manual MSCI Taiwan list (approx. 90 tickers)
manual_list = [
"2330.TW", "2317.TW", "2454.TW", "2308.TW", "2881.TW", "2382.TW", "2882.TW", "2412.TW", "2303.TW", "3711.TW",
"2886.TW", "2357.TW", "1301.TW", "2891.TW", "2002.TW", "2301.TW", "1303.TW", "5880.TW", "2884.TW", "2892.TW",
"1216.TW", "2327.TW", "2885.TW", "3008.TW", "2408.TW", "2603.TW", "5871.TW", "2379.TW", "2880.TW", "3045.TW",
"2912.TW", "2883.TW", "1326.TW", "4904.TW", "2395.TW", "2609.TW", "2887.TW", "1101.TW", "2345.TW", "1605.TW",
"2615.TW", "3231.TW", "4938.TW", "2377.TW", "2409.TW", "5876.TW", "3034.TW", "2352.TW", "1402.TW", "2105.TW",
"9910.TW", "2356.TW", "2801.TW", "2618.TW", "3037.TW", "6505.TW", "1504.TW", "2049.TW", "2313.TW", "2360.TW",
"2474.TW", "2890.TW", "3017.TW", "3661.TW", "6669.TW", "1513.TW", "1519.TW", "2376.TW", "2383.TW", "2449.TW",
"3035.TW", "3443.TW", "3532.TW", "4763.TW", "4958.TW", "4966.TW", "5269.TW", "5274.TW", "6239.TW", "6415.TW",
"8046.TW", "8454.TW", "9921.TW", "9945.TW", "2353.TW", "2324.TW", "2633.TW", "1102.TW"
]
return list(set(scraped_tickers + manual_list))
# --- PHASE 1: GET TICKERS & VALIDATE MARKET CAPS ---
tickers = get_taiwan_tickers()
if not tickers:
print("CRITICAL: No tickers were retrieved.")
exit()
print(f"Phase 1: Validating {len(tickers)} Taiwan Tickers and fetching Market Caps...")
cap_list = []
for t in tickers:
try:
stock = yf.Ticker(t)
# TWD is the currency for Taiwan stocks
m_cap = stock.info.get('marketCap', 0)
currency = stock.info.get('currency', 'TWD')
if m_cap > 0:
cap_list.append({'Ticker': t, 'MarketCap': m_cap, 'Currency': currency})
print(f"Success: {t} | Cap: {m_cap:,} {currency}")
else:
print(f"Verified (No Cap data): {t}")
except Exception:
print(f"FAILED fetch for: {t}")
# Sort by Market Cap
sorted_df = pd.DataFrame(cap_list)
if not sorted_df.empty:
sorted_df = sorted_df.sort_values(by='MarketCap', ascending=False)
ordered_tickers = sorted_df[sorted_df['MarketCap'] > 0]['Ticker'].tolist()
else:
print("No valid MarketCap data found.")
exit()
# --- PHASE 2: DOWNLOAD HISTORICAL DATA ---
print(f"\nPhase 2: Downloading Daily Data for {len(ordered_tickers)} stocks...")
start = "1976-01-01"
end = "2026-03-21"
data = yf.download(ordered_tickers, start=start, end=end, auto_adjust=True)
if isinstance(data.columns, pd.MultiIndex):
close_data = data['Close']
else:
close_data = data
# Filter valid and reorder
valid_tickers = [t for t in ordered_tickers if t in close_data.columns]
close_data = close_data[valid_tickers]
# --- PHASE 3: EXPORT ---
df_wide = close_data.transpose()
df_wide.columns = pd.to_datetime(df_wide.columns).strftime('%Y-%m-%d')
file_name = f"Taiwan_MSCI_MarketCap_Sorted_{len(valid_tickers)}.csv"
df_wide.to_csv(file_name)
print("-" * 30)
print(f"Done! Exported {len(valid_tickers)} stocks to {file_name}")
#The two that failed (5274.TW - ASPEED and 4966.TW - Parade Technologies) are common "gotchas" in Taiwan data. Both are primarily listed on the Taipei Exchange (TPEx) rather than the main Taiwan Stock Exchange. On Yahoo Finance, they often require the .TWO suffix instead of .TW. If you ever need those specific semiconductor mid-caps for completeness, just swapping the suffix usually does the trick.
import pandas as pd
import yfinance as yf
import ssl
import numpy as np
# --- BYPASS SSL CERTIFICATE ISSUES ---
ssl._create_default_https_context = ssl._create_unverified_context
def get_mexico_tickers_full():
"""
Manual list of the S&P/BMV IPC constituents.
These make up the core of the MSCI Mexico 25/50 index.
"""
full_list = [
"AMXB.MX", "WALMEX.MX", "FEMSAUBD.MX", "GFNORTEO.MX", "GMEXICOB.MX",
"CEMEXCPO.MX", "BIMBOA.MX", "ASURB.MX", "GAPB.MX", "OMAB.MX",
"GFINBURO.MX", "GRUMAB.MX", "AC.MX", "KOFUBL.MX", "KIMBERA.MX",
"PINFRA.MX", "ORBIA.MX", "TLEVISACPO.MX", "MEGACPO.MX", "GCC.MX",
"ALFAA.MX", "ALPEKA.MX", "VESTA.MX", "BOLSAA.MX", "RA.MX", "LABB.MX",
"GENTERA.MX", "PE&OLES.MX", "CUERVO.MX", "Q.MX", "CHDRAUIB.MX"
]
return full_list
# --- PHASE 1: GET TICKERS & VALIDATE MARKET CAPS ---
print("Phase 1: Validating Mexico Ticker List (IPC Components)...")
tickers = get_mexico_tickers_full()
cap_data = []
for t in tickers:
try:
stock = yf.Ticker(t)
# Fetching Market Cap in MXN
mcap = stock.info.get('marketCap', 0)
if mcap > 0:
cap_data.append({'Ticker': t, 'MarketCap': mcap})
print(f"Added: {t} | {mcap:,.0f} MXN")
else:
# Fallback for tickers with missing info field
print(f"No direct Cap data for {t}, calculating manually...")
hist = stock.history(period="1d")
if not hist.empty:
shares = stock.info.get('sharesOutstanding', 0)
mcap = hist['Close'].iloc[-1] * shares
if mcap > 0:
cap_data.append({'Ticker': t, 'MarketCap': mcap})
print(f"Added (Manual Calc): {t}")
except Exception as e:
print(f"Skipping {t} due to error: {e}")
continue
if not cap_data:
print("CRITICAL: No Market Cap data found. Check your internet connection.")
exit()
# Sort by Market Cap (Highest to Lowest)
df_caps = pd.DataFrame(cap_data).sort_values('MarketCap', ascending=False)
ordered_tickers = df_caps['Ticker'].tolist()
# --- PHASE 2: DOWNLOAD HISTORICAL DATA ---
print(f"\nPhase 2: Downloading Daily Data for {len(ordered_tickers)} stocks...")
# Start date set to your requested 1976 baseline
start_date = "1976-01-01"
end_date = "2026-03-19"
data = yf.download(ordered_tickers, start=start_date, end=end_date, auto_adjust=True)
# Handle MultiIndex and extract Close prices
if isinstance(data.columns, pd.MultiIndex):
close_data = data['Close']
else:
close_data = data
# Filter only tickers that successfully returned data
valid_final_tickers = [t for t in ordered_tickers if t in close_data.columns]
close_data = close_data[valid_final_tickers]
# --- PHASE 3: EXPORT ---
# Transpose for your target format: Rows = Tickers, Columns = Dates
df_wide = close_data.transpose()
df_wide.columns = pd.to_datetime(df_wide.columns).strftime('%Y-%m-%d')
file_name = f"Mexico_MSCI_2550_Sorted_{len(valid_final_tickers)}.csv"
df_wide.to_csv(file_name)
print("-" * 30)
print(f"Success! Exported {len(valid_final_tickers)} Mexican stocks to {file_name}")
import pandas as pd
import yfinance as yf
import ssl
import urllib.request # <--- This was the missing piece!
# --- BYPASS SSL CERTIFICATE ISSUES ---
ssl._create_default_https_context = ssl._create_unverified_context
def get_ftse100_tickers():
url = "https://en.wikipedia.org/wiki/FTSE_100_Index"
# Use a User-Agent to prevent 403 Forbidden errors
req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0'})
try:
with urllib.request.urlopen(req) as response:
tables = pd.read_html(response)
# The constituents list is usually the largest table on this page
df = max(tables, key=len)
# Find the ticker column dynamically (Wikipedia changes names occasionally)
possible_names = ['EPIC', 'Ticker', 'Symbol', 'Stock symbol', 'Ticker symbol']
ticker_col = next((name for name in possible_names if name in df.columns), df.columns[0])
print(f"Found ticker column: {ticker_col}")
# Format for Yahoo Finance: Replace dot with dash and add .L
# e.g., BT.A -> BT-A.L
tickers = df[ticker_col].astype(str).str.replace('.', '-', regex=False) + ".L"
return tickers.tolist()
except Exception as e:
print(f"Error fetching FTSE list: {e}")
return []
# --- PHASE 1: GET TICKERS & VALIDATE WEIGHTS ---
tickers = get_ftse100_tickers()
if not tickers:
print("CRITICAL: No tickers were retrieved. Check internet or SSL settings.")
exit()
print(f"Phase 1: Validating {len(tickers)} FTSE 100 Tickers and fetching Market Caps...")
cap_list = []
for t in tickers:
try:
# Note: This part takes time as it calls the API for each stock
stock = yf.Ticker(t)
info = stock.info
m_cap = info.get('marketCap', 0)
currency = info.get('currency', 'GBP')
cap_list.append({'Ticker': t, 'MarketCap': m_cap, 'Currency': currency})
print(f"Success: {t} | Cap: {m_cap:,} {currency}")
except Exception:
print(f"FAILED info fetch for: {t}")
cap_list.append({'Ticker': t, 'MarketCap': 0, 'Currency': 'N/A'})
# Sort by Market Cap to approximate index weighting
sorted_df = pd.DataFrame(cap_list)
if not sorted_df.empty and 'MarketCap' in sorted_df.columns:
sorted_df = sorted_df.sort_values(by='MarketCap', ascending=False)
ordered_tickers = sorted_df[sorted_df['MarketCap'] > 0]['Ticker'].tolist()
else:
print("No valid MarketCap data found.")
exit()
# --- PHASE 2: DOWNLOAD HISTORICAL DATA ---
print(f"\nPhase 2: Downloading Daily Data for {len(ordered_tickers)} stocks...")
start = "1976-01-01"
end = "2026-03-08"
# Bulk download historical prices
data = yf.download(ordered_tickers, start=start, end=end, auto_adjust=True)
# Handle MultiIndex if necessary and extract Close prices
if isinstance(data.columns, pd.MultiIndex):
close_data = data['Close']
else:
close_data = data
# Reorder columns to match our Market Cap sorting
close_data = close_data[ordered_tickers]
# --- PHASE 3: EXPORT ---
# Transpose so rows are Tickers and columns are Dates
df_wide = close_data.transpose()
df_wide.columns = pd.to_datetime(df_wide.columns).strftime('%Y-%m-%d')
file_name = "FTSE100_MarketCap_Sorted.csv"
df_wide.to_csv(file_name)
print("-" * 30)
print(f"Done! Exported {len(ordered_tickers)} stocks to {file_name}")
import pandas as pd
import yfinance as yf
import ssl
import urllib.request
import io
# --- BYPASS SSL CERTIFICATE ISSUES ---
ssl._create_default_https_context = ssl._create_unverified_context
def get_indonesia_complete_list():
"""
Targets LQ45 and EIDO components, ensuring prefixes like 'IDX:' are stripped.
"""
url = "https://en.wikipedia.org/wiki/LQ45"
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)'}
raw_tickers = []
try:
req = urllib.request.Request(url, headers=headers)
with urllib.request.urlopen(req) as response:
tables = pd.read_html(io.BytesIO(response.read()))
df = next(t for t in tables if any('Ticker' in str(col) or 'Code' in str(col) for col in t.columns))
code_col = [col for col in df.columns if 'Ticker' in str(col) or 'Code' in str(col)][0]
raw_tickers = df[code_col].astype(str).tolist()
except Exception as e:
print(f"Auto-fetch failed: {e}")
# Manual Additions for ETF coverage (EIDO/MSCI)
etf_additions = [
"BBCA", "BBRI", "BMRI", "TLKM", "ASII", "BBNI", "BYAN", "AMMN", "TPIA",
"MAPI", "MAPA", "MYOR", "CPIN", "EXCL", "ISAT", "BUKA", "ESSA", "MEDC",
"HRUM", "CTRA", "PWON", "BSDE", "SMRA", "JSMR", "AVIA", "BELI", "MTEL", "TOWR",
"ADRO", "ANTM", "INCO", "UNTR", "KLBF", "UNVR", "ICBP", "INDF", "INKP"
]
combined = list(set(raw_tickers + etf_additions))
# --- CLEANING STEP: Remove 'IDX:' and add '.JK' ---
clean_tickers = []
for t in combined:
clean = t.upper().replace("IDX:", "").strip()
if not clean.endswith(".JK"):
clean += ".JK"
clean_tickers.append(clean)
return list(set(clean_tickers))
# --- PHASE 1: GET TICKERS & VALIDATE ---
tickers = get_indonesia_complete_list()
print(f"Phase 1: Validating {len(tickers)} Indonesia Tickers (Sanitized)...")
cap_list = []
for t in tickers:
try:
stock = yf.Ticker(t)
m_cap = stock.info.get('marketCap', 0)
if m_cap > 0:
cap_list.append({'Ticker': t, 'MarketCap': m_cap})
print(f"Success: {t} | Cap: Rp{m_cap:,.0f}")
else:
# Fallback for stocks that have data but no 'info' cap
hist = stock.history(period="1d")
if not hist.empty:
cap_list.append({'Ticker': t, 'MarketCap': 0})
print(f"Success (History Only): {t}")
except:
continue
# Sort by Market Cap
sorted_df = pd.DataFrame(cap_list).sort_values(by='MarketCap', ascending=False)
ordered_tickers = sorted_df['Ticker'].tolist()
# --- PHASE 2: DOWNLOAD HISTORICAL DATA ---
print(f"\nPhase 2: Downloading Daily Data for {len(ordered_tickers)} stocks...")
start, end = "1976-01-01", "2026-03-21"
data = yf.download(ordered_tickers, start=start, end=end, auto_adjust=True)
close_data = data['Close'] if isinstance(data.columns, pd.MultiIndex) else data
# --- PHASE 3: EXPORT ---
df_wide = close_data.transpose()
df_wide.columns = pd.to_datetime(df_wide.columns).strftime('%Y-%m-%d')
file_name = f"Indonesia_ETF_Full_Sanitized_{len(ordered_tickers)}.csv"
df_wide.to_csv(file_name)
print("-" * 30)
print(f"Done! Exported {len(ordered_tickers)} stocks to {file_name}")
import pandas as pd
import yfinance as yf
import ssl
import urllib.request
# --- BYPASS SSL CERTIFICATE ISSUES ---
ssl._create_default_https_context = ssl._create_unverified_context
def get_italy_tickers():
"""
Automated scrape with 'Smart Suffix' logic to prevent .MI.MI errors.
"""
url = "https://en.wikipedia.org/wiki/FTSE_MIB"
req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0'})
scraped_tickers = []
try:
with urllib.request.urlopen(req) as response:
tables = pd.read_html(response)
df = None
for t in tables:
if any(col in t.columns for col in ['Ticker', 'Ticker symbol', 'Symbol']):
df = t
break
if df is not None:
ticker_col = next(col for col in df.columns if col in ['Ticker', 'Ticker symbol', 'Symbol'])
for t in df[ticker_col].astype(str):
clean_t = t.strip()
# Only add .MI if it's not already there
if not clean_t.endswith('.MI'):
clean_t += '.MI'
scraped_tickers.append(clean_t)
except Exception as e:
print(f"Scrape failed: {e}")
# Refined manual list (Corrected BPE and removed redundancy)
manual_list = [
"ENI.MI", "ENEL.MI", "ISP.MI", "UCG.MI", "RACE.MI", "STLAM.MI", "STMMI.MI",
"G.MI", "MONC.MI", "PRY.MI", "TRN.MI", "SRG.MI", "PST.MI", "TEN.MI",
"LDO.MI", "MB.MI", "FBK.MI", "AMP.MI", "REC.MI", "DIA.MI", "NEXI.MI",
"BAMI.MI", "BPE.MI", "TIT.MI", "AZM.MI", "SPM.MI", "INW.MI", "ERG.MI",
"ITM.MI", "PIRC.MI", "BRE.MI"
]
# Combine lists and remove duplicates
return list(set(scraped_tickers + manual_list))
# --- PHASE 1: GET TICKERS & VALIDATE MARKET CAPS ---
tickers = get_italy_tickers()
if not tickers:
print("CRITICAL: No tickers were retrieved.")
exit()
print(f"Phase 1: Validating {len(tickers)} Italy Tickers and fetching Market Caps...")
cap_list = []
for t in tickers:
try:
stock = yf.Ticker(t)
info = stock.info
m_cap = info.get('marketCap', 0)
currency = info.get('currency', 'EUR')
if m_cap > 0:
cap_list.append({'Ticker': t, 'MarketCap': m_cap, 'Currency': currency})
print(f"Success: {t} | Cap: {m_cap:,} {currency}")
except Exception:
print(f"FAILED info fetch for: {t}")
# Sort by Market Cap
sorted_df = pd.DataFrame(cap_list)
if not sorted_df.empty:
sorted_df = sorted_df.sort_values(by='MarketCap', ascending=False)
ordered_tickers = sorted_df['Ticker'].tolist()
else:
print("No valid MarketCap data found.")
exit()
# --- PHASE 2: DOWNLOAD HISTORICAL DATA ---
print(f"\nPhase 2: Downloading Daily Data for {len(ordered_tickers)} stocks...")
start = "1976-01-01"
end = "2026-03-21" # Current date
data = yf.download(ordered_tickers, start=start, end=end, auto_adjust=True)
if isinstance(data.columns, pd.MultiIndex):
close_data = data['Close']
else:
close_data = data
# Filter and reorder
valid_tickers = [t for t in ordered_tickers if t in close_data.columns]
close_data = close_data[valid_tickers]
# --- PHASE 3: EXPORT ---
df_wide = close_data.transpose()
df_wide.columns = pd.to_datetime(df_wide.columns).strftime('%Y-%m-%d')
file_name = f"Italy_MarketCap_Sorted_{len(valid_tickers)}.csv"
df_wide.to_csv(file_name)
print("-" * 30)
print(f"Done! Exported {len(valid_tickers)} stocks to {file_name}")
import pandas as pd
import yfinance as yf
import ssl
import urllib.request
# --- BYPASS SSL CERTIFICATE ISSUES ---
ssl._create_default_https_context = ssl._create_unverified_context
def get_portugal_tickers():
"""
Automated scrape of the PSI (Portugal) constituents from Wikipedia
with 'Smart Suffix' logic for the Lisbon (.LS) exchange.
"""
url = "https://en.wikipedia.org/wiki/PSI_20"
req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0'})
scraped_tickers = []
try:
with urllib.request.urlopen(req) as response:
tables = pd.read_html(response)
# Find the table containing ticker symbols
df = None
for t in tables:
if any(col in t.columns for col in ['Ticker', 'Symbol', 'Ticker symbol']):
df = t
break
if df is not None:
ticker_col = next(col for col in df.columns if col in ['Ticker', 'Symbol', 'Ticker symbol'])
for t in df[ticker_col].astype(str):
clean_t = t.strip().upper()
# Ensure single .LS suffix
if not clean_t.endswith('.LS'):
clean_t += '.LS'
scraped_tickers.append(clean_t)
except Exception as e:
print(f"Scrape failed: {e}")
# Your manual list from the image (16 tickers)
manual_list = [
"ALTR.LS", "BCP.LS", "COR.LS", "CTT.LS", "EDP.LS", "EDPR.LS",
"GALP.LS", "IBS.LS", "JMT.LS", "EGL.LS", "NOS.LS", "NVG.LS",
"PHR.LS", "RENE.LS", "SEM.LS", "SON.LS"
]
# Merge and remove duplicates
return list(set(scraped_tickers + manual_list))
# --- PHASE 1: GET TICKERS & VALIDATE MARKET CAPS ---
tickers = get_portugal_tickers()
if not tickers:
print("CRITICAL: No tickers were retrieved.")
exit()
print(f"Phase 1: Validating {len(tickers)} Portugal Tickers and fetching Market Caps...")
cap_list = []
for t in tickers:
try:
stock = yf.Ticker(t)
info = stock.info
m_cap = info.get('marketCap', 0)
currency = info.get('currency', 'EUR')
# Note: Pharl (PHR.LS) sometimes returns 0 cap if suspended/OTC
if m_cap > 0:
cap_list.append({'Ticker': t, 'MarketCap': m_cap, 'Currency': currency})
print(f"Success: {t} | Cap: {m_cap:,} {currency}")
else:
# Add with 0 so it's still in the dataframe but sorted to bottom
cap_list.append({'Ticker': t, 'MarketCap': 0, 'Currency': currency})
print(f"Verified (No Cap): {t}")
except Exception:
print(f"FAILED info fetch for: {t}")
# Sort by Market Cap
sorted_df = pd.DataFrame(cap_list)
if not sorted_df.empty:
sorted_df = sorted_df.sort_values(by='MarketCap', ascending=False)
# We filter out 0 for the final ordered list to keep the data clean
ordered_tickers = sorted_df[sorted_df['MarketCap'] > 0]['Ticker'].tolist()
else:
print("No valid data found.")
exit()
# --- PHASE 2: DOWNLOAD HISTORICAL DATA ---
print(f"\nPhase 2: Downloading Daily Data for {len(ordered_tickers)} stocks...")
start = "1976-01-01"
end = "2026-03-21"
data = yf.download(ordered_tickers, start=start, end=end, auto_adjust=True)
if isinstance(data.columns, pd.MultiIndex):
close_data = data['Close']
else:
close_data = data
valid_tickers = [t for t in ordered_tickers if t in close_data.columns]
close_data = close_data[valid_tickers]
# --- PHASE 3: EXPORT ---
df_wide = close_data.transpose()
df_wide.columns = pd.to_datetime(df_wide.columns).strftime('%Y-%m-%d')
file_name = f"Portugal_PSI_MarketCap_Sorted_{len(valid_tickers)}.csv"
df_wide.to_csv(file_name)
print("-" * 30)
print(f"Done! Exported {len(valid_tickers)} stocks to {file_name}")
import pandas as pd
import yfinance as yf
import ssl
import urllib.request
import io
# --- BYPASS SSL CERTIFICATE ISSUES ---
ssl._create_default_https_context = ssl._create_unverified_context
def get_nasdaq_100_tickers():
"""
Automated fetch from Wikipedia with a User-Agent header
to prevent 403 Forbidden errors.
"""
url = "https://en.wikipedia.org/wiki/Nasdaq-100"
# Adding a header to mimic a standard browser
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
try:
req = urllib.request.Request(url, headers=headers)
with urllib.request.urlopen(req) as response:
content = response.read()
# Parse the HTML content specifically for the constituents table
tables = pd.read_html(io.BytesIO(content))
# Usually the table is at index 4, but we search for the 'Ticker' column to be safe
df = None
for t in tables:
if 'Ticker' in t.columns:
df = t
break
if df is not None:
return df['Ticker'].tolist()
else:
print("Could not find the ticker table in Wikipedia.")
return []
except Exception as e:
print(f"Automated Wikipedia fetch failed: {e}")
return []
# --- PHASE 1: GET TICKERS & VALIDATE ---
tickers = get_nasdaq_100_tickers()
if not tickers:
print("CRITICAL: No tickers were retrieved.")
exit()
print(f"Phase 1: Validating {len(tickers)} Nasdaq-100 Tickers...")
cap_list = []
for t in tickers:
try:
stock = yf.Ticker(t)
# Market Cap is in USD for Nasdaq stocks
m_cap = stock.info.get('marketCap', 0)
if m_cap > 0:
cap_list.append({'Ticker': t, 'MarketCap': m_cap})
print(f"Success: {t} | Cap: ${m_cap:,.0f}")
else:
print(f"Verified (No Cap data): {t}")
except Exception:
print(f"FAILED fetch for: {t}")
# Sort by Market Cap (Highest to Lowest)
sorted_df = pd.DataFrame(cap_list)
if not sorted_df.empty:
sorted_df = sorted_df.sort_values(by='MarketCap', ascending=False)
ordered_tickers = sorted_df['Ticker'].tolist()
else:
print("No valid data found.")
exit()
# --- PHASE 2: DOWNLOAD HISTORICAL DATA ---
print(f"\nPhase 2: Downloading Daily Data for {len(ordered_tickers)} stocks...")
start = "1976-01-01"
end = "2026-03-21"
# Bulk download (Threads=True for speed)
data = yf.download(ordered_tickers, start=start, end=end, auto_adjust=True)
# Select 'Close' prices
if isinstance(data.columns, pd.MultiIndex):
close_data = data['Close']
else:
close_data = data
# Ensure we only export tickers that actually returned data
valid_tickers = [t for t in ordered_tickers if t in close_data.columns]
close_data = close_data[valid_tickers]
# --- PHASE 3: EXPORT ---
# Transpose so tickers are rows and dates are columns
df_wide = close_data.transpose()
df_wide.columns = pd.to_datetime(df_wide.columns).strftime('%Y-%m-%d')
file_name = f"Nasdaq100_MarketCap_Sorted_{len(valid_tickers)}.csv"
df_wide.to_csv(file_name)
print("-" * 30)
print(f"Done! Exported {len(valid_tickers)} Nasdaq-100 stocks to {file_name}")
import pandas as pd
import yfinance as yf
import ssl
import io
import urllib.request
# --- BYPASS SSL CERTIFICATE ISSUES ---
ssl._create_default_https_context = ssl._create_unverified_context
def get_china_mchi_tickers():
"""
Fetches the detailed holdings CSV directly from iShares MCHI page.
Maps tickers to their Yahoo Finance equivalents (.HK, .SS, .SZ, or US).
"""
# The direct CSV download link for MCHI holdings
csv_url = "https://www.ishares.com/us/products/239619/ishares-msci-china-etf/1467271812596.ajax?fileType=csv&fileName=MCHI_holdings&dataType=fund"
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36'}
req = urllib.request.Request(csv_url, headers=headers)
ticker_list = []
try:
with urllib.request.urlopen(req) as response:
content = response.read().decode('utf-8')
# iShares CSVs often have metadata in the first 9 rows
df = pd.read_csv(io.StringIO(content), skiprows=9)
if 'Ticker' in df.columns and 'Exchange' in df.columns:
for index, row in df.iterrows():
ticker = str(row['Ticker']).strip()
exchange = str(row['Exchange']).strip()
# Mapping logic for Yahoo Finance
if exchange == "Hong Kong Stock Exchange":
# Ensure 4-digit numeric for HK (e.g., 700 -> 0700.HK)
clean_t = ticker.zfill(4) + ".HK"
elif exchange == "Shanghai Stock Exchange":
clean_t = ticker + ".SS"
elif exchange == "Shenzhen Stock Exchange":
clean_t = ticker + ".SZ"
elif exchange in ["NASDAQ", "New York Stock Exchange Inc."]:
clean_t = ticker # US ADRs have no suffix
else:
continue
ticker_list.append(clean_t)
except Exception as e:
print(f"Automated fetch failed: {e}. Falling back to manual heavyweights...")
# Manual fallback for the top 15 most important names
ticker_list = [
"0700.HK", "BABA", "0939.HK", "1810.HK", "PDD", "1398.HK",
"2318.HK", "3690.HK", "1211.HK", "3988.HK", "NTES", "BIDU",
"2628.HK", "JD", "600519.SS"
]
return list(set(ticker_list))
# --- PHASE 1: GET TICKERS & VALIDATE ---
tickers = get_china_mchi_tickers()
print(f"Phase 1: Validating {len(tickers)} China Tickers...")
cap_list = []
for t in tickers:
try:
# Note: China A-Shares (SS/SZ) can be slow to respond to bulk info requests
stock = yf.Ticker(t)
info = stock.info
m_cap = info.get('marketCap', 0)
currency = info.get('currency', 'HKD') # Default to HKD/USD/CNY
if m_cap > 0:
cap_list.append({'Ticker': t, 'MarketCap': m_cap, 'Currency': currency})
print(f"Success: {t} | Cap: {m_cap:,} {currency}")
except Exception:
pass # Skip failed fetches quietly to keep console clean
# Sort by Market Cap
sorted_df = pd.DataFrame(cap_list)
if not sorted_df.empty:
sorted_df = sorted_df.sort_values(by='MarketCap', ascending=False)
ordered_tickers = sorted_df['Ticker'].tolist()
else:
print("No valid data found.")
exit()
# --- PHASE 2: DOWNLOAD HISTORICAL DATA ---
# China data availability on Yahoo varies: HK is good back to the 90s, A-Shares vary.
print(f"\nPhase 2: Downloading Daily Data for {len(ordered_tickers)} stocks...")
start = "1976-01-01"
end = "2026-03-21"
data = yf.download(ordered_tickers, start=start, end=end, auto_adjust=True)
if isinstance(data.columns, pd.MultiIndex):
close_data = data['Close']
else:
close_data = data
# --- PHASE 3: EXPORT ---
df_wide = close_data.transpose()
df_wide.columns = pd.to_datetime(df_wide.columns).strftime('%Y-%m-%d')
file_name = f"China_MCHI_Sorted_{len(ordered_tickers)}.csv"
df_wide.to_csv(file_name)
print("-" * 30)
print(f"Done! Exported {len(ordered_tickers)} stocks to {file_name}")
import pandas as pd
import yfinance as yf
import ssl
import numpy as np
# --- BYPASS SSL CERTIFICATE ISSUES ---
ssl._create_default_https_context = ssl._create_unverified_context
def get_japan_tickers_full():
"""
Manual list of the MSCI Japan constituents (181 stocks).
Using .T suffix for Tokyo Stock Exchange.
"""
tickers_str = (
"7203.T, 8306.T, 6501.T, 6758.T, 8316.T, 6857.T, 8035.T, 8411.T, 8058.T, 7011.T, "
"6861.T, 9984.T, 8031.T, 8001.T, 4063.T, 9983.T, 8766.T, 4502.T, 7974.T, 6098.T, "
"6902.T, 4503.T, 6702.T, 6301.T, 7267.T, 6503.T, 8053.T, 4519.T, 6981.T, 6367.T, "
"9432.T, 9433.T, 2914.T, 6146.T, 6273.T, 5108.T, 8801.T, 8802.T, 6723.T, 7741.T, "
"4901.T, 4661.T, 6954.T, 1925.T, 1928.T, 9101.T, 9104.T, 9107.T, 8002.T, 8015.T, "
"7733.T, 4568.T, 9020.T, 9021.T, 9022.T, 4689.T, 7261.T, 7201.T, 3407.T, 3402.T, "
"5401.T, 5411.T, 6504.T, 6506.T, 6762.T, 6869.T, 7751.T, 7752.T, 8037.T, 8267.T, "
"8308.T, 8309.T, 8601.T, 8604.T, 8630.T, 8725.T, 8750.T, 9201.T, 9202.T, 9501.T, "
"9502.T, 9503.T, 9613.T, 9735.T, 9843.T, 1605.T, 1801.T, 1802.T, 1803.T, 1812.T, "
"2267.T, 2502.T, 2503.T, 2801.T, 2802.T, 3088.T, 3092.T, 3289.T, 3382.T, 4021.T, "
"4183.T, 4188.T, 4324.T, 4452.T, 4507.T, 4523.T, 4543.T, 4578.T, 4612.T, 4704.T, "
"4732.T, 4768.T, 4911.T, 5020.T, 5201.T, 5332.T, 5713.T, 5802.T, 5901.T, 6113.T, "
"6326.T, 6471.T, 6479.T, 6586.T, 6645.T, 6701.T, 6752.T, 6920.T, 6971.T, 7012.T, "
"7013.T, 7269.T, 7270.T, 7272.T, 7309.T, 7532.T, 7832.T, 7911.T, 7912.T, 8113.T, "
"8227.T, 8233.T, 8252.T, 8331.T, 8354.T, 8355.T, 8410.T, 8473.T, 8591.T, 8593.T, "
"8830.T, 9001.T, 9005.T, 9007.T, 9008.T, 9009.T, 9041.T, 9042.T, 9064.T, 9301.T, "
"9508.T, 9531.T, 9532.T, 9602.T, 9684.T, 9697.T, 9766.T, 9989.T, 2269.T, 2413.T, "
"3436.T, 3659.T, 4151.T, 4528.T, 4921.T, 6724.T, 6976.T, 7182.T, 7186.T, 9434.T"
)
return [t.strip() for t in tickers_str.split(",")]
# --- PHASE 1: GET TICKERS & VALIDATE MARKET CAPS ---
print("Phase 1: Validating 181 Japan Tickers and fetching Market Caps...")
tickers = get_japan_tickers_full()
cap_list = []
# Note: This list is long, so we print progress
total = len(tickers)
for i, t in enumerate(tickers, 1):
try:
stock = yf.Ticker(t)
m_cap = stock.info.get('marketCap', 0)
currency = stock.info.get('currency', 'JPY')
cap_list.append({'Ticker': t, 'MarketCap': m_cap, 'Currency': currency})
if i % 20 == 0:
print(f"Progress: {i}/{total} verified...")
except Exception:
cap_list.append({'Ticker': t, 'MarketCap': 0, 'Currency': 'N/A'})
# Sort by Market Cap
sorted_df = pd.DataFrame(cap_list)
if not sorted_df.empty and 'MarketCap' in sorted_df.columns:
sorted_df = sorted_df.sort_values(by='MarketCap', ascending=False)
ordered_tickers = sorted_df[sorted_df['MarketCap'] > 0]['Ticker'].tolist()
else:
print("No valid MarketCap data found.")
exit()
# --- PHASE 2: DOWNLOAD HISTORICAL DATA ---
print(f"\nPhase 2: Downloading Daily Data for {len(ordered_tickers)} stocks...")
start = "1976-01-01"
end = "2026-03-21"
# Bulk download historical prices
data = yf.download(ordered_tickers, start=start, end=end, auto_adjust=True)
if isinstance(data.columns, pd.MultiIndex):
close_data = data['Close']
else:
close_data = data
# Filter only tickers that actually downloaded data and reorder by Market Cap
valid_tickers = [t for t in ordered_tickers if t in close_data.columns]
close_data = close_data[valid_tickers]
# --- PHASE 3: EXPORT ---
df_wide = close_data.transpose()
df_wide.columns = pd.to_datetime(df_wide.columns).strftime('%Y-%m-%d')
file_name = f"Japan_MSCI_MarketCap_Sorted_{len(valid_tickers)}.csv"
df_wide.to_csv(file_name)
print("-" * 30)
print(f"Done! Exported {len(valid_tickers)} stocks to {file_name}")