Add support for calling built-in functions (only print for the moment).

progval · progval · commit 3dfa6e7e93f7 · 2016-04-13T17:59:00.000+02:00
diff --git a/src/lib.rs b/src/lib.rs
@@ -6,6 +6,7 @@ mod stack;
 
 use std::fmt;
 use std::io;
+use processor::Processor;
 
 #[derive(Debug)]
 pub enum InterpreterError {
@@ -24,19 +25,22 @@ impl fmt::Display for InterpreterError {
     }
 }
 
-pub fn run_module<R: io::Read, EP: sandbox::EnvProxy>(reader: &mut R, envproxy: &mut EP) -> Result<objects::ObjectRef, InterpreterError> {
+pub fn run_module<R: io::Read, EP: sandbox::EnvProxy>(reader: &mut R, envproxy: EP) -> Result<(Processor<EP>, objects::ObjectRef), InterpreterError> {
     let mut buf = [0; 12];
     try!(reader.read_exact(&mut buf).map_err(InterpreterError::Io));
     // TODO: do something with the content of the buffer
     let mut store = objects::ObjectStore::new();
     let module = try!(marshal::read_object(reader, &mut store).map_err(InterpreterError::Unmarshal));
-    processor::run_code_object(envproxy, &mut store, module).map_err(InterpreterError::Processor)
+    let mut processor = Processor { envproxy: envproxy, store: store, builtin_functions: Processor::get_default_builtins() };
+    let result = try!(processor.run_code_object(module).map_err(InterpreterError::Processor));
+    Ok((processor, result))
 }
 
 #[test]
 fn test_hello_world() {
     let mut reader: &[u8] = b"\xee\x0c\r\n\x15j\nW\x15\x00\x00\x00\xe3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00@\x00\x00\x00s\x0e\x00\x00\x00e\x00\x00d\x00\x00\x83\x01\x00\x01d\x01\x00S)\x02z\x0bHello worldN)\x01\xda\x05print\xa9\x00r\x02\x00\x00\x00r\x02\x00\x00\x00\xfa\x0b/tmp/foo.py\xda\x08<module>\x01\x00\x00\x00s\x00\x00\x00\x00";
-    let mut envproxy = sandbox::MockEnvProxy::new();
-    println!("{:?}", run_module(&mut reader, &mut envproxy).unwrap());
-    assert_eq!(*envproxy.stdout_content.lock().unwrap(), b"Hello world\n");
+    let envproxy = sandbox::MockEnvProxy::new();
+    let (processor, result) = run_module(&mut reader, envproxy).unwrap();
+    println!("{:?}", result);
+    assert_eq!(*processor.envproxy.stdout_content.lock().unwrap(), b"Hello world\n");
 }
diff --git a/src/marshal/mod.rs b/src/marshal/mod.rs
@@ -2,8 +2,7 @@ pub mod common;
 pub mod decode;
 
 use std::io;
-use std::collections::HashSet;
-use super::objects::{Code, ObjectContent, Object, ObjectRef, ObjectStore};
+use super::objects::{Code, ObjectContent, ObjectRef, ObjectStore};
 use self::common::Object as MarshalObject;
 use self::common::Code as MarshalCode;
 
@@ -24,7 +23,13 @@ macro_rules! translate_code_field {
 fn translate_code(c: MarshalCode, translation_map: &Vec<ObjectRef>, store: &mut ObjectStore) -> Code {
     let code = translate_code_field!(c, Bytes, code, translation_map, store, "Code.code object must be bytes.");
     let consts = translate_code_field!(c, Tuple, consts, translation_map, store, "Code.consts object must be a tuple.");
-    let names = translate_code_field!(c, Tuple, names, translation_map, store, "Code.names object must be a tuple.");
+    let name_objs = translate_code_field!(c, Tuple, names, translation_map, store, "Code.names object must be a tuple.");
+    let names = name_objs.iter().map(|obj| {
+        match store.deref(obj).content {
+            ObjectContent::String(ref name) => name.clone(),
+            _ => panic!("At least one object in Code.names is not a string."),
+        }
+    }).collect();
     Code { code: code, consts: consts, names: names }
 }
 
diff --git a/src/objects/mod.rs b/src/objects/mod.rs
@@ -1,5 +1,4 @@
 use std::collections::HashMap;
-use std::sync::Mutex;
 use std::sync::atomic::{AtomicUsize, Ordering};
 
 #[derive(Debug)]
@@ -12,7 +11,7 @@ pub struct Code {/*
     pub flags: u32,*/
     pub code: Vec<u8>,
     pub consts: Vec<ObjectRef>,
-    pub names: Vec<ObjectRef>,/*
+    pub names: Vec<String>,/*
     pub varnames: Object,
     pub freevars: Object,
     pub cellvars: Object,
@@ -23,6 +22,7 @@ pub struct Code {/*
 }
 
 #[derive(Debug)]
+#[derive(Clone)]
 pub enum ObjectContent {
     None,
     True,
@@ -35,6 +35,7 @@ pub enum ObjectContent {
     Set(Vec<ObjectRef>),
     FrozenSet(Vec<ObjectRef>),
     Bytes(Vec<u8>),
+    BuiltinFunction(String),
 }
 
 #[derive(Debug)]
diff --git a/src/processor/instructions.rs b/src/processor/instructions.rs
@@ -1,5 +1,3 @@
-use std::str::Bytes;
-
 #[derive(PartialEq)]
 #[derive(Debug)]
 pub enum Instruction {
diff --git a/src/processor/mod.rs b/src/processor/mod.rs
@@ -1,10 +1,12 @@
 pub mod instructions;
 
-use super::objects::{Object, Code, ObjectStore, ObjectRef, ObjectContent};
+use super::objects::{Code, ObjectStore, ObjectRef, ObjectContent};
 use super::sandbox::EnvProxy;
 use super::stack::{Stack, VectorStack};
 use self::instructions::Instruction;
 use std::fmt;
+use std::collections::HashMap;
+use std::io::Write;
 
 #[derive(Debug)]
 pub enum ProcessorError {
@@ -16,6 +18,8 @@ pub enum ProcessorError {
     StackTooSmall,
     InvalidConstIndex,
     InvalidNameIndex,
+    UnknownBuiltin(String),
+    Exception(String),
 }
 
 impl fmt::Display for ProcessorError {
@@ -24,45 +28,98 @@ impl fmt::Display for ProcessorError {
     }
 }
 
-fn call_function<EP: EnvProxy>(envproxy: &mut EP, store: &mut ObjectStore, func_ref: &ObjectRef, args: Vec<ObjectRef>, kwags: Vec<ObjectRef>) -> Result<ObjectRef, ProcessorError> {
-    let code = match store.deref(func_ref).content {
-        ObjectContent::Code(ref code) => code.clone(),
-        ref o => return Err(ProcessorError::NotACodeObject(format!("{:?}", o))),
-    };
-    run_code(envproxy, store, code)
-}
-
-fn run_code<EP: EnvProxy>(envproxy: &mut EP, store: &mut ObjectStore, code: Code) -> Result<ObjectRef, ProcessorError> {
-    let bytecode: Vec<u8> = code.code;
-    let instructions: Vec<Instruction> = instructions::InstructionDecoder::new(bytecode.iter()).into_iter().collect();
-    let mut program_counter = 0 as usize;
-    let mut stack = VectorStack::new();
-    while true {
-        let instruction = try!(instructions.get(program_counter).ok_or(ProcessorError::InvalidProgramCounter));
-        program_counter += 1;
-        match *instruction {
-            Instruction::ReturnValue => return Ok(try!(stack.pop().ok_or(ProcessorError::StackTooSmall))),
-            Instruction::LoadConst(i) => stack.push(try!(code.names.get(i).ok_or(ProcessorError::InvalidConstIndex)).clone()),
-            Instruction::LoadName(i) => stack.push(try!(code.names.get(i).ok_or(ProcessorError::InvalidNameIndex)).clone()),
-            Instruction::CallFunction(nb_args, nb_kwargs) => {
-                // See “Call constructs” at:
-                // http://security.coverity.com/blog/2014/Nov/understanding-python-bytecode.html
-                let kwargs = try!(stack.pop_many(nb_kwargs*2).ok_or(ProcessorError::StackTooSmall));
-                let args = try!(stack.pop_many(nb_args).ok_or(ProcessorError::StackTooSmall));
-                let func = try!(stack.pop().ok_or(ProcessorError::StackTooSmall));
-                let ret_value = call_function(envproxy, store, &func, args, kwargs);
-                stack.push(try!(ret_value))
+fn builtin_print<EP: EnvProxy>(processor: &mut Processor<EP>, args: Vec<ObjectRef>, kwargs: HashMap<String, ObjectRef>) -> Result<ObjectRef, ProcessorError> {
+    if args.len() > 1 {
+        return Err(ProcessorError::Exception(format!("print takes exactly one argument, not {}.", args.len())))
+    }
+    else {
+        let obj_ref = args.get(0).unwrap();
+        match processor.store.deref(obj_ref).content {
+            ObjectContent::String(ref s) => {
+                processor.envproxy.stdout().write(s.clone().into_bytes().as_slice()).unwrap(); // TODO: check
+                processor.envproxy.stdout().write(b"\n").unwrap(); // TODO: check
             }
-            _ => panic!(format!("todo: instruction {:?}", *instruction)),
+            ref o => return Err(ProcessorError::Exception(format!("print takes a string, not {:?}", o))),
         }
-    };
-    panic!("Unreachable")
+    }
+    Ok(processor.store.allocate(ObjectContent::None))
 }
 
-pub fn run_code_object<EP: EnvProxy>(envproxy: &mut EP, store: &mut ObjectStore, module: ObjectRef) -> Result<ObjectRef, ProcessorError> {
-    let code = match store.deref(&module).content {
-        ObjectContent::Code(ref code) => code.clone(),
-        ref o => return Err(ProcessorError::NotACodeObject(format!("{:?}", o))),
-    };
-    run_code(envproxy, store, code)
+pub type PyFunction<EP> = fn(&mut Processor<EP>, /*args:*/ Vec<ObjectRef>, /*kwargs:*/ HashMap<String, ObjectRef>) -> Result<ObjectRef, ProcessorError>;
+
+pub struct Processor<EP: EnvProxy> {
+    pub envproxy: EP,
+    pub store: ObjectStore,
+    pub builtin_functions: HashMap<String, PyFunction<EP>>,
+}
+
+impl<EP: EnvProxy> Processor<EP> {
+    pub fn get_default_builtins() -> HashMap<String, PyFunction<EP>> {
+        let mut builtins: HashMap<String, PyFunction<EP>> = HashMap::new();
+        builtins.insert("print".to_string(), builtin_print);
+        builtins
+    }
+
+    fn load_name(&mut self, name: String) -> Result<ObjectRef, ProcessorError> {
+        Ok(self.store.allocate(ObjectContent::BuiltinFunction(name)))
+    }
+
+    fn call_function(&mut self, func_ref: &ObjectRef, args: Vec<ObjectRef>, kwargs: Vec<ObjectRef>) -> Result<ObjectRef, ProcessorError> {
+        // TODO: clone only if necessary
+        match self.store.deref(func_ref).content.clone() {
+            ObjectContent::Code(code) => {
+                self.run_code(code)
+            },
+            ObjectContent::BuiltinFunction(name) => {
+                let f = match self.builtin_functions.get(&name) {
+                    Some(f) => f.clone(),
+                    None => return Err(ProcessorError::UnknownBuiltin(name.clone())),
+                };
+                f(self, args, HashMap::new()) // TODO: use the real kwargs
+            },
+            ref o => return Err(ProcessorError::NotACodeObject(format!("{:?}", o))),
+        }
+    }
+
+    fn run_code(&mut self, code: Code) -> Result<ObjectRef, ProcessorError> {
+        let bytecode: Vec<u8> = code.code;
+        let instructions: Vec<Instruction> = instructions::InstructionDecoder::new(bytecode.iter()).into_iter().collect();
+        let mut program_counter = 0 as usize;
+        let mut stack = VectorStack::new();
+        while true {
+            let instruction = try!(instructions.get(program_counter).ok_or(ProcessorError::InvalidProgramCounter));
+            program_counter += 1;
+            match *instruction {
+                Instruction::PopTop => {
+                    try!(stack.pop().ok_or(ProcessorError::StackTooSmall));
+                    ()
+                },
+                Instruction::ReturnValue => return Ok(try!(stack.pop().ok_or(ProcessorError::StackTooSmall))),
+                Instruction::LoadConst(i) => stack.push(try!(code.consts.get(i).ok_or(ProcessorError::InvalidConstIndex)).clone()),
+                Instruction::LoadName(i) => {
+                    let name = try!(code.names.get(i).ok_or(ProcessorError::InvalidNameIndex)).clone();
+                    stack.push(try!(self.load_name(name)))
+                }
+                Instruction::CallFunction(nb_args, nb_kwargs) => {
+                    // See “Call constructs” at:
+                    // http://security.coverity.com/blog/2014/Nov/understanding-python-bytecode.html
+                    let kwargs = try!(stack.pop_many(nb_kwargs*2).ok_or(ProcessorError::StackTooSmall));
+                    let args = try!(stack.pop_many(nb_args).ok_or(ProcessorError::StackTooSmall));
+                    let func = try!(stack.pop().ok_or(ProcessorError::StackTooSmall));
+                    let ret_value = self.call_function(&func, args, kwargs);
+                    stack.push(try!(ret_value))
+                }
+                _ => panic!(format!("todo: instruction {:?}", *instruction)),
+            }
+        };
+        panic!("Unreachable")
+    }
+
+    pub fn run_code_object(&mut self, module: ObjectRef) -> Result<ObjectRef, ProcessorError> {
+        let code = match self.store.deref(&module).content {
+            ObjectContent::Code(ref code) => code.clone(),
+            ref o => return Err(ProcessorError::NotACodeObject(format!("{:?}", o))),
+        };
+        self.run_code(code)
+    }
 }

Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,3 @@`
`1`		`-use std::str::Bytes;`
`2`		`-`
`3`	`1`	`#[derive(PartialEq)]`
`4`	`2`	`#[derive(Debug)]`
`5`	`3`	`pub enum Instruction {`