"""
Extract ServerData from Microsoft login page HTML to get the post URL and tokens.
Then use requests for the actual form submissions.
"""
import requests
import re
import json

session = requests.Session()
session.headers.update({
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
})

auth_url = "https://login.live.com/oauth20_authorize.srf?client_id=00000000402b5328&scope=service::user.auth.xboxlive.com::MBI_SSL&response_type=token&nocode=1"

# Step 1: GET page
print("Step 1: GET authorize page...")
resp = session.get(auth_url)
print(f"  Status: {resp.status_code}")

# Extract ServerData from the HTML
match = re.search(r'var ServerData\s*=\s*(\{.*?\});', resp.text, re.DOTALL)
if not match:
    # Try alternate pattern
    match = re.search(r'ServerData\s*:\s*(\{.*?\})\s*[,;]', resp.text, re.DOTALL)

if match:
    # The JSON might be truncated - find the closing brace properly
    raw = match.group(1)
    # Fix: find balanced braces
    depth = 0
    end = 0
    for i, c in enumerate(raw):
        if c == '{':
            depth += 1
        elif c == '}':
            depth -= 1
            if depth == 0:
                end = i + 1
                break
    server_json = raw[:end]
    server_data = json.loads(server_json)
    
    print(f"  urlPost: {server_data.get('urlPost', 'N/A')[:120]}...")
    print(f"  sFTName: {server_data.get('sFTName', 'N/A')}")
    print(f"  iDefaultLoginOptions: {server_data.get('iDefaultLoginOptions', 'N/A')}")
    
    # Extract PPFT from sFTTag
    ft_tag = server_data.get('sFTTag', '')
    ppft_match = re.search(r'value="([^"]*)"', ft_tag)
    ppft = ppft_match.group(1) if ppft_match else None
    print(f"  PPFT: {ppft[:30] if ppft else 'N/A'}...")
    
    post_url = server_data.get('urlPost', '')
    
    # Step 2: POST email
    print(f"\nStep 2: POST email to {post_url[:80]}...")
    data = {
        'loginfmt': 'testuser@gmail.com',
        'PPFT': ppft or '',
        'LoginOptions': '1',
    }
    
    resp2 = session.post(post_url, data=data, allow_redirects=False)
    print(f"  Status: {resp2.status_code}")
    loc = resp2.headers.get('Location', '')
    print(f"  Location: {loc[:150]}")
    
    if resp2.status_code in (302, 303) and loc:
        resp3 = session.get(loc, allow_redirects=True)
        print(f"  After redirect: {resp3.url[:120]}")
        
        # Check response for password page indicators
        if 'passwd' in resp3.text or 'type="password"' in resp3.text:
            print("  ✓ Response contains password field!")
            
            # Extract ServerData from this response too
            match3 = re.search(r'var ServerData\s*=\s*(\{.*?\});', resp3.text, re.DOTALL)
            if match3:
                raw3 = match3.group(1)
                depth = 0
                end = 0
                for i, c in enumerate(raw3):
                    if c == '{':
                        depth += 1
                    elif c == '}':
                        depth -= 1
                        if depth == 0:
                            end = i + 1
                            break
                server_data3 = json.loads(raw3[:end])
                post_url3 = server_data3.get('urlPost', post_url)
                ft_tag3 = server_data3.get('sFTTag', '')
                ppft3_match = re.search(r'value="([^"]*)"', ft_tag3)
                ppft3 = ppft3_match.group(1) if ppft3_match else ppft
                
                print(f"  New urlPost: {post_url3[:80]}...")
                print(f"  New PPFT: {ppft3[:30] if ppft3 else 'N/A'}...")
                
                # Now we need to extract hidden fields from the password page
                # Since it's JS-rendered, let's try to get them from the response
                # by looking for the form data in the HTML
                
                # Try to find the actual form with hidden inputs
                from bs4 import BeautifulSoup
                soup3 = BeautifulSoup(resp3.text, 'html.parser')
                
                # Look for hidden inputs
                hidden_inputs = soup3.find_all('input', {'type': 'hidden'})
                print(f"  Found {len(hidden_inputs)} hidden inputs in password page")
                
                # Also check for password input
                pw_inputs = soup3.find_all('input', {'type': 'password'})
                print(f"  Found {len(pw_inputs)} password inputs")
                
                # If no hidden inputs found (JS rendered), we need Playwright for this step
                if len(hidden_inputs) == 0:
                    print("  → Hidden fields are JS-rendered, need Playwright to extract them")
                    print("  → Strategy: Use Playwright ONLY to navigate to password page and extract fields")
                    print("  → Then use requests for password submission (faster for bulk)")
        else:
            # Check what we got
            text_snippet = resp3.text[:500]
            if 'proofConfirmation' in text_snippet:
                print("  → Still on proof confirmation page")
            elif 'passwd' in resp3.text:
                print("  → Contains passwd reference")
            else:
                print(f"  → Response snippet: {resp3.text[:300]}")
else:
    print("  Could not extract ServerData from page")
    # Check if JS is required
    if 'javascript' in resp.text.lower():
        print("  Page requires JavaScript")

print("\n=== COOKIES ===")
for name, value in session.cookies.items():
    print(f"  {name}: {value[:40]}...")
