Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Grammar/python.gram
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ statement_newline[asdl_seq*]:
| a=compound_stmt NEWLINE { singleton_seq(p, a) }
| simple_stmt
| NEWLINE { singleton_seq(p, CHECK(_Py_Pass(EXTRA))) }
| ENDMARKER { interactive_exit(p) }
simple_stmt[asdl_seq*]:
| a=small_stmt !';' NEWLINE { singleton_seq(p, a) } # Not needed, there for speedup
| a=';'.small_stmt+ [';'] NEWLINE { a }
Expand Down
8 changes: 8 additions & 0 deletions Include/pegen_interface.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,16 @@ extern "C" {

PyAPI_FUNC(mod_ty) PyPegen_ASTFromFile(const char *filename, int mode, PyArena *arena);
PyAPI_FUNC(mod_ty) PyPegen_ASTFromString(const char *str, int mode, PyArena *arena);
PyAPI_FUNC(mod_ty) PyPegen_ASTFromFileObject(FILE *fp, PyObject *filename_ob,
int mode, const char *enc, const char *ps1,
const char *ps2, int *errcode, PyArena *arena);
PyAPI_FUNC(PyCodeObject *) PyPegen_CodeObjectFromFile(const char *filename, int mode);
PyAPI_FUNC(PyCodeObject *) PyPegen_CodeObjectFromString(const char *str, int mode);
PyAPI_FUNC(PyCodeObject *) PyPegen_CodeObjectFromFileObject(FILE *, PyObject *filename_ob,
int mode, const char *enc,
const char *ps1,
const char *ps2,
int *errcode);

#ifdef __cplusplus
}
Expand Down
2 changes: 1 addition & 1 deletion Lib/test/test_peg_generator/test_c_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -300,7 +300,7 @@ def test_syntax_error_for_string(self) -> None:
except SyntaxError as e:
tb = traceback.format_exc()
self.assertTrue('File "<string>", line 1' in tb)
self.assertTrue(f"{text}\n ^" in tb)
self.assertTrue(f"SyntaxError: invalid syntax" in tb)

def test_headers_and_trailer(self) -> None:
grammar_source = """
Expand Down
16 changes: 15 additions & 1 deletion Parser/pegen/parse.c
Original file line number Diff line number Diff line change
Expand Up @@ -753,7 +753,7 @@ statement_rule(Parser *p)
return res;
}

// statement_newline: compound_stmt NEWLINE | simple_stmt | NEWLINE
// statement_newline: compound_stmt NEWLINE | simple_stmt | NEWLINE | $
static asdl_seq*
statement_newline_rule(Parser *p)
{
Expand Down Expand Up @@ -816,6 +816,20 @@ statement_newline_rule(Parser *p)
}
p->mark = mark;
}
{ // $
void *endmarker_var;
if (
(endmarker_var = endmarker_token(p))
)
{
res = interactive_exit ( p );
if (res == NULL && PyErr_Occurred()) {
longjmp(p->error_env, 1);
}
goto done;
}
p->mark = mark;
}
res = NULL;
done:
return res;
Expand Down
2 changes: 1 addition & 1 deletion Parser/pegen/parse_string.c
Original file line number Diff line number Diff line change
Expand Up @@ -586,7 +586,7 @@ fstring_compile_expr(Parser *p, const char *expr_start, const char *expr_end,
return NULL;
}

Parser *p2 = Parser_New(tok, Py_fstring_input, STRING_INPUT, p->arena);
Parser *p2 = Parser_New(tok, Py_fstring_input, NULL, p->arena);

expr = parse(p2);

Expand Down
34 changes: 34 additions & 0 deletions Parser/pegen/peg_api.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,15 @@ PyPegen_ASTFromFile(const char *filename, int mode, PyArena *arena)
return result;
}

mod_ty
PyPegen_ASTFromFileObject(FILE *fp, PyObject *filename_ob, int mode,
const char *enc, const char *ps1, const char* ps2,
int *errcode, PyArena *arena)
{
return run_parser_from_file_pointer(fp, mode, filename_ob, enc, ps1, ps2,
errcode, arena);
}

PyCodeObject *
PyPegen_CodeObjectFromString(const char *str, int mode)
{
Expand Down Expand Up @@ -84,3 +93,28 @@ PyPegen_CodeObjectFromFile(const char *filename, int mode)
PyArena_Free(arena);
return result;
}

PyCodeObject *
PyPegen_CodeObjectFromFileObject(FILE *fp, PyObject *filename_ob, int mode,
const char *ps1, const char *ps2, const char *enc,
int *errcode)
{
PyArena *arena = PyArena_New();
if (arena == NULL) {
return NULL;
}

PyCodeObject *result = NULL;

mod_ty res = PyPegen_ASTFromFileObject(fp, filename_ob, mode, enc, ps1, ps2,
errcode, arena);
if (res == NULL) {
goto error;
}

result = PyAST_CompileObject(res, filename_ob, NULL, -1, arena);

error:
PyArena_Free(arena);
return result;
}
92 changes: 56 additions & 36 deletions Parser/pegen/pegen.c
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include <Python.h>
#include <errcode.h>
#include "../tokenizer.h"

#include "pegen.h"
Expand Down Expand Up @@ -57,6 +58,7 @@ raise_syntax_error(Parser *p, const char *errmsg, ...)
PyObject *loc = NULL;
PyObject *tmp = NULL;
Token *t = p->tokens[p->fill - 1];
Py_ssize_t col_number = 0;
va_list va;

va_start(va, errmsg);
Expand All @@ -65,25 +67,24 @@ raise_syntax_error(Parser *p, const char *errmsg, ...)
if (!errstr) {
goto error;
}
if (p->input_mode == FILE_INPUT) {

if (p->start_rule == Py_file_input) {
loc = PyErr_ProgramTextObject(p->tok->filename, t->lineno);
if (!loc) {
Py_INCREF(Py_None);
loc = Py_None;
}
}
else {
assert(p->input_mode == STRING_INPUT);
else if (p->start_rule == Py_fstring_input || p->start_rule == Py_eval_input) {
loc = get_error_line(p->tok->buf);
if (!loc) {
goto error;
}
}
// We may receive tokens with the col_offset not initialized (-1) since
// emitted by fill_token(). For instance, this can happen in some error
// situations involving invalid indentation.
int col_offset = t->col_offset == -1 ? 0 : t->col_offset;
Py_ssize_t col_number = byte_offset_to_character_offset(loc, col_offset) + 1;

if (loc) {
int col_offset = t->col_offset == -1 ? 0 : t->col_offset;
col_number = byte_offset_to_character_offset(loc, col_offset) + 1;
}
else {
Py_INCREF(Py_None);
loc = Py_None;
}


tmp = Py_BuildValue("(OiiN)", p->tok->filename, t->lineno, col_number, loc);
if (!tmp) {
goto error;
Expand All @@ -94,6 +95,7 @@ raise_syntax_error(Parser *p, const char *errmsg, ...)
goto error;
}
PyErr_SetObject(PyExc_SyntaxError, value);

Py_DECREF(errstr);
Py_DECREF(value);
return 0;
Expand Down Expand Up @@ -546,8 +548,7 @@ Parser_Free(Parser *p)
}

Parser *
Parser_New(struct tok_state *tok, int start_rule, int input_mode,
PyArena *arena)
Parser_New(struct tok_state *tok, int start_rule, int *errcode, PyArena *arena)
{
Parser *p = PyMem_Malloc(sizeof(Parser));
if (p == NULL) {
Expand All @@ -556,7 +557,6 @@ Parser_New(struct tok_state *tok, int start_rule, int input_mode,
}
assert(tok != NULL);
p->tok = tok;
p->input_mode = input_mode;
p->keywords = NULL;
p->n_keyword_lists = -1;
p->tokens = PyMem_Malloc(sizeof(Token *));
Expand All @@ -570,6 +570,7 @@ Parser_New(struct tok_state *tok, int start_rule, int input_mode,
p->fill = 0;
p->size = 1;

p->errcode = errcode;
p->arena = arena;
p->start_rule = start_rule;

Expand Down Expand Up @@ -602,37 +603,47 @@ run_parser(Parser *p)
}

mod_ty
run_parser_from_file(const char *filename, int start_rule,
PyObject *filename_ob, PyArena *arena)
run_parser_from_file_pointer(FILE *fp, int start_rule, PyObject *filename_ob,
const char *enc, const char *ps1, const char *ps2,
int *errcode, PyArena *arena)
{
FILE *fp = fopen(filename, "rb");
if (fp == NULL) {
PyErr_SetFromErrnoWithFilename(PyExc_OSError, filename);
return NULL;
}

// From here on we need to clean up even if there's an error
mod_ty result = NULL;

struct tok_state *tok = PyTokenizer_FromFile(fp, NULL, NULL, NULL);
struct tok_state *tok = PyTokenizer_FromFile(fp, enc, ps1, ps2);
if (tok == NULL) {
goto error;
return NULL;
}
// This transfers the ownership to the tokenizer
tok->filename = filename_ob;
Py_INCREF(filename_ob);

Parser *p = Parser_New(tok, start_rule, FILE_INPUT, arena);
// From here on we need to clean up even if there's an error
mod_ty result = NULL;

Parser *p = Parser_New(tok, start_rule, errcode, arena);
if (p == NULL) {
goto after_tok_error;
goto error;
}

result = run_parser(p);
Parser_Free(p);

after_tok_error:
PyTokenizer_Free(tok);
error:
PyTokenizer_Free(tok);
return result;
}

mod_ty
run_parser_from_file(const char *filename, int start_rule,
PyObject *filename_ob, PyArena *arena)
{
FILE *fp = fopen(filename, "rb");
if (fp == NULL) {
PyErr_SetFromErrnoWithFilename(PyExc_OSError, filename);
return NULL;
}

mod_ty result = run_parser_from_file_pointer(fp, start_rule, filename_ob,
NULL, NULL, NULL, NULL, arena);

fclose(fp);
return result;
}
Expand All @@ -652,7 +663,7 @@ run_parser_from_string(const char *str, int start_rule, PyObject *filename_ob,
// We need to clear up from here on
mod_ty result = NULL;

Parser *p = Parser_New(tok, start_rule, STRING_INPUT, arena);
Parser *p = Parser_New(tok, start_rule, NULL, arena);
if (p == NULL) {
goto error;
}
Expand All @@ -665,6 +676,15 @@ run_parser_from_string(const char *str, int start_rule, PyObject *filename_ob,
return result;
}

void *
interactive_exit(Parser *p)
{
if (p->errcode) {
*(p->errcode) = E_EOF;
}
return NULL;
}

/* Creates a single-element asdl_seq* that contains a */
asdl_seq *
singleton_seq(Parser *p, void *a)
Expand Down
13 changes: 5 additions & 8 deletions Parser/pegen/pegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,6 @@
#include <pyarena.h>
#include <setjmp.h>

enum INPUT_MODE {
FILE_INPUT,
STRING_INPUT,
};
typedef enum INPUT_MODE INPUT_MODE;

typedef struct _memo {
int type;
void *node;
Expand Down Expand Up @@ -42,7 +36,7 @@ typedef struct {
KeywordToken **keywords;
int n_keyword_lists;
int start_rule;
INPUT_MODE input_mode;
int *errcode;
jmp_buf error_env;
} Parser;

Expand Down Expand Up @@ -136,10 +130,13 @@ CHECK_CALL_NULL_ALLOWED(Parser *p, void *result)
#define CHECK_NULL_ALLOWED(result) CHECK_CALL_NULL_ALLOWED(p, result)

PyObject *new_identifier(Parser *, char *);
Parser *Parser_New(struct tok_state *, int, int, PyArena *);
Parser *Parser_New(struct tok_state *, int, int *, PyArena *);
void Parser_Free(Parser *);
mod_ty run_parser_from_file_pointer(FILE *, int, PyObject *, const char *,
const char *, const char *, int *, PyArena *);
mod_ty run_parser_from_file(const char *, int, PyObject *, PyArena *);
mod_ty run_parser_from_string(const char *, int, PyObject *, PyArena *);
void *interactive_exit(Parser *);
asdl_seq *singleton_seq(Parser *, void *);
asdl_seq *seq_insert_in_front(Parser *, void *, asdl_seq *);
asdl_seq *seq_flatten(Parser *, asdl_seq *);
Expand Down
15 changes: 12 additions & 3 deletions Python/pythonrun.c
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@ PyRun_InteractiveOneObjectEx(FILE *fp, PyObject *filename,
PyArena *arena;
const char *ps1 = "", *ps2 = "", *enc = NULL;
int errcode = 0;
int use_peg = _PyInterpreterState_GET_UNSAFE()->config.use_peg;
_Py_IDENTIFIER(encoding);
_Py_IDENTIFIER(__main__);

Expand Down Expand Up @@ -240,9 +241,17 @@ PyRun_InteractiveOneObjectEx(FILE *fp, PyObject *filename,
Py_XDECREF(oenc);
return -1;
}
mod = PyParser_ASTFromFileObject(fp, filename, enc,
Py_single_input, ps1, ps2,
flags, &errcode, arena);

if (use_peg) {
mod = PyPegen_ASTFromFileObject(fp, filename, Py_single_input,
enc, ps1, ps2, &errcode, arena);
}
else {
mod = PyParser_ASTFromFileObject(fp, filename, enc,
Py_single_input, ps1, ps2,
flags, &errcode, arena);
}

Py_XDECREF(v);
Py_XDECREF(w);
Py_XDECREF(oenc);
Expand Down