@@ -2218,19 +2218,48 @@ def read_stata(
22182218 ... # Operate on a single chunk, e.g., chunk.mean()
22192219 ... pass # doctest: +SKIP
22202220 """
2221- reader = StataReader (
2222- filepath_or_buffer ,
2223- convert_dates = convert_dates ,
2224- convert_categoricals = convert_categoricals ,
2225- index_col = index_col ,
2226- convert_missing = convert_missing ,
2227- preserve_dtypes = preserve_dtypes ,
2228- columns = columns ,
2229- order_categoricals = order_categoricals ,
2230- chunksize = chunksize ,
2231- storage_options = storage_options ,
2232- compression = compression ,
2233- )
2221+ try :
2222+ reader = StataReader (
2223+ filepath_or_buffer ,
2224+ convert_dates = convert_dates ,
2225+ convert_categoricals = convert_categoricals ,
2226+ index_col = index_col ,
2227+ convert_missing = convert_missing ,
2228+ preserve_dtypes = preserve_dtypes ,
2229+ columns = columns ,
2230+ order_categoricals = order_categoricals ,
2231+ chunksize = chunksize ,
2232+ storage_options = storage_options ,
2233+ compression = compression ,
2234+ )
2235+ except ValueError as e :
2236+ # If users pass HTML/JSON/etc. (e.g., a GitHub page URL), StataReader
2237+ # often raises a version/format ValueError. Replace with a clearer message.
2238+ msg = str (e )
2239+ if (
2240+ "Version of given Stata file is" in msg
2241+ or "not a Stata dataset" in msg
2242+ or "not a valid Stata" in msg
2243+ ):
2244+ base = (
2245+ "This is not a valid Stata dataset. This may be because it is not a "
2246+ "valid Stata dataset, or a Stata dataset from a version of Stata that "
2247+ "pandas cannot import. pandas supports importing versions 105, 108, "
2248+ "111 (Stata 7SE), 113 (Stata 8/9), 114 (Stata 10/11), 115 (Stata 12), "
2249+ "117 (Stata 13), 118 (Stata 14/15/16), and 119 (Stata 15/16, over 32, "
2250+ "767 variables)."
2251+ )
2252+ hint = ""
2253+ if isinstance (filepath_or_buffer , (str , os .PathLike )):
2254+ s = os .fspath (filepath_or_buffer )
2255+ if "github.com" in s and ("/blob/" in s or "/tree/" in s ):
2256+ hint = (
2257+ " If you're loading from GitHub, use the Raw file URL "
2258+ "(replace '/blob/' with '/raw/' or click the 'Raw' button)."
2259+ )
2260+ raise ValueError (base + hint ) from e
2261+ # Different error: keep original
2262+ raise
22342263
22352264 if iterator or chunksize :
22362265 return reader
0 commit comments