Source code for course_hoanganhduc.gclass_sheets

# -*- coding: utf-8 -*-

import os
import re
import csv
from urllib.parse import urlparse, parse_qs
from googleapiclient.discovery import build
from .gclass_auth import _get_google_classroom_credentials


[docs]
def download_google_sheet_to_csv(
    sheet_url,
    output_path=None,
    credentials_path='gclassroom_credentials.json',
    token_path='token.pickle',
    verbose=False,
    sheet_name=None,
    sheet_selection='first',
):
    """
    Download a Google Sheet as CSV using the first sheet, gid in URL, or specified sheet.
    Returns the output CSV path.
    
    Args:
        sheet_name: Specific sheet name to download
        sheet_selection: Mode for sheet selection - 'first' (default), 'select' (interactive), 'all', or 'merge'
    """
    if not sheet_url:
        raise ValueError("Google Sheet URL is required.")

    def _extract_sheet_id(value):
        match = re.search(r"/spreadsheets/d/([a-zA-Z0-9-_]+)", value or "")
        return match.group(1) if match else None

    def _extract_gid(value):
        parsed = urlparse(value)
        if parsed.fragment:
            frag_qs = parse_qs(parsed.fragment)
            if "gid" in frag_qs:
                return frag_qs["gid"][0]
            if parsed.fragment.startswith("gid="):
                return parsed.fragment.split("gid=", 1)[-1]
        query = parse_qs(parsed.query)
        if "gid" in query:
            return query["gid"][0]
        return None

    sheet_id = _extract_sheet_id(sheet_url)
    if not sheet_id:
        raise ValueError("Could not parse spreadsheet ID from URL.")
    gid = _extract_gid(sheet_url)

    creds = _get_google_classroom_credentials(credentials_path, token_path, verbose=verbose)
    service = build("sheets", "v4", credentials=creds)

    # Get all sheets for selection
    all_sheets = list_google_sheets(sheet_url, credentials_path, token_path, verbose=False)
    
    if not all_sheets:
        raise ValueError("Could not retrieve sheet information from Google Sheets.")
    
    # Determine which sheet(s) to download
    selected_sheet_titles = []
    
    if sheet_name:
        # Specific sheet requested
        for s in all_sheets:
            if s['title'] == sheet_name:
                selected_sheet_titles = [sheet_name]
                break
        if not selected_sheet_titles:
            available = ', '.join([s['title'] for s in all_sheets])
            raise ValueError(f"Sheet '{sheet_name}' not found. Available sheets: {available}")
    elif sheet_selection == 'select':
        # Interactive selection
        print(f"\nAvailable sheets in Google Sheets document:")
        for i, s in enumerate(all_sheets, 1):
            print(f"  {i}. {s['title']}")
        
        choice = input(f"\nSelect sheet number(s) (1-{len(all_sheets)}, comma-separated for multiple, or 'all'): ").strip().lower()
        
        if choice == 'all':
            selected_sheet_titles = [s['title'] for s in all_sheets]
            print(f"[GSheets] Selected all {len(all_sheets)} sheets for merging")
        else:
            try:
                sheet_indices = [int(x.strip()) - 1 for x in choice.split(',')]
                for idx in sheet_indices:
                    if 0 <= idx < len(all_sheets):
                        selected_sheet_titles.append(all_sheets[idx]['title'])
                    else:
                        print(f"Warning: Sheet number {idx + 1} out of range, skipping")
                
                if not selected_sheet_titles:
                    print("No valid sheets selected. Using first sheet.")
                    selected_sheet_titles = [all_sheets[0]['title']]
            except ValueError:
                print("Invalid input. Using first sheet.")
                selected_sheet_titles = [all_sheets[0]['title']]
    elif sheet_selection in ['all', 'merge']:
        # Import all sheets
        selected_sheet_titles = [s['title'] for s in all_sheets]
        if verbose:
            print(f"[GSheets] Importing all {len(all_sheets)} sheets for merging")
    elif gid:
        # GID specified in URL - try to find matching sheet
        try:
            gid_int = int(gid)
            for s in all_sheets:
                if s['sheetId'] == gid_int:
                    selected_sheet_titles = [s['title']]
                    break
        except ValueError:
            pass
        if not selected_sheet_titles:
            selected_sheet_titles = [all_sheets[0]['title']]
    else:
        # Default: first sheet
        selected_sheet_titles = [all_sheets[0]['title']]
    
    if verbose or len(selected_sheet_titles) > 1:
        print(f"[GSheets] Selected sheet(s): {', '.join(selected_sheet_titles)}")

    # Download and merge sheets
    all_values = []
    for sheet_title in selected_sheet_titles:
        if len(selected_sheet_titles) > 1 and verbose:
            print(f"[GSheets] Downloading sheet: {sheet_title}")
        
        # Quote the sheet name to prevent interpretation as cell reference
        # (e.g., "DN2022" would be interpreted as column DN, row 2022 without quotes)
        quoted_sheet = f"'{sheet_title}'"
        
        values = service.spreadsheets().values().get(
            spreadsheetId=sheet_id,
            range=quoted_sheet,
        ).execute().get("values", []) or []
        
        if values:
            all_values.append(values)
    
    if not all_values:
        raise ValueError("No data found in selected Google Sheet(s).")
    
    # Merge all sheets
    if len(all_values) > 1:
        if verbose:
            print(f"[GSheets] Merging {len(all_values)} sheets...")
        # Concatenate all rows from all sheets
        merged_values = []
        for values in all_values:
            merged_values.extend(values)
        values = merged_values
    else:
        values = all_values[0]

    max_cols = max(len(row) for row in values)
    normalized_rows = [row + [""] * (max_cols - len(row)) for row in values]

    if not output_path:
        output_path = os.path.join(os.getcwd(), "google_sheet.csv")
    os.makedirs(os.path.dirname(os.path.abspath(output_path)) or ".", exist_ok=True)
    with open(output_path, "w", newline="", encoding="utf-8") as f:
        writer = csv.writer(f)
        writer.writerows(normalized_rows)
    if verbose:
        print(f"[GSheets] Saved CSV to {output_path}")
    return output_path



def list_google_sheets(sheet_url, credentials_path='gclassroom_credentials.json', token_path='token.pickle', verbose=False):
    """
    List all sheets in a Google Sheets document.
    Returns a list of dicts with 'title' and 'sheetId' for each sheet.
    """
    if not sheet_url:
        raise ValueError("Google Sheet URL is required.")
    
    def _extract_sheet_id(value):
        match = re.search(r"/spreadsheets/d/([a-zA-Z0-9-_]+)", value or "")
        return match.group(1) if match else None
    
    sheet_id = _extract_sheet_id(sheet_url)
    if not sheet_id:
        raise ValueError("Could not parse spreadsheet ID from URL.")
    
    try:
        creds = _get_google_classroom_credentials(credentials_path, token_path, verbose=verbose)
        service = build("sheets", "v4", credentials=creds)
        
        meta = service.spreadsheets().get(
            spreadsheetId=sheet_id,
            fields="sheets(properties(sheetId,title,index))"
        ).execute()
        
        sheets = meta.get("sheets", []) or []
        sheet_list = []
        for s in sheets:
            props = s.get("properties", {}) or {}
            sheet_list.append({
                "title": props.get("title", ""),
                "sheetId": props.get("sheetId", 0),
                "index": props.get("index", 0)
            })
        
        # Sort by index
        sheet_list.sort(key=lambda x: x["index"])
        
        if verbose:
            print(f"[GSheets] Found {len(sheet_list)} sheets:")
            for s in sheet_list:
                print(f"  - {s['title']} (ID: {s['sheetId']})")
        
        return sheet_list
    except Exception as e:
        if verbose:
            print(f"[GSheets] Error listing sheets: {e}")
        return []


def get_google_file_metadata(file_id, credentials_path='gclassroom_credentials.json', token_path='token.pickle', verbose=False):
    """
    Get metadata for a file from Google Drive API.
    Returns a dict with 'id', 'name', 'modifiedTime', 'webViewLink', etc.
    """
    if not file_id:
        return None
    
    try:
        creds = _get_google_classroom_credentials(credentials_path, token_path, verbose=verbose)
        if not creds:
            if verbose:
                print("[GDrive] No valid credentials found.")
            return None
            
        service = build("drive", "v3", credentials=creds)
        file_meta = service.files().get(
            fileId=file_id, 
            fields="id, name, modifiedTime, webViewLink, owners"
        ).execute()
        return file_meta
    except Exception as e:
        if verbose:
            print(f"[GDrive] Error fetching metadata for {file_id}: {e}")
        return None
Source code for course_hoanganhduc.gclass_sheets

Course Management Toolkit

Navigation

Related Topics