Publications
2019 |
Brilli,; Burgio, An open source research framework for IoT-capable smart traffic lights Inproceedings ACM International Conference Proceeding Series, pp. 165–170, Association for Computing Machinery, 2019, ISBN: 978-1-4503-6261-0. Links | BibTeX | Tags: Datasets, Gaze detection, Neural networks, Text tagging @inproceedings{brilli_open_2019, title = {An open source research framework for IoT-capable smart traffic lights}, author = {G Brilli and P Burgio}, url = {http://portal.acm.org/}, doi = {10.1145/3342428.3342692}, isbn = {978-1-4503-6261-0}, year = {2019}, date = {2019-01-01}, booktitle = {ACM International Conference Proceeding Series}, pages = {165--170}, publisher = {Association for Computing Machinery}, keywords = {Datasets, Gaze detection, Neural networks, Text tagging}, pubstate = {published}, tppubtype = {inproceedings} } |
2018 |
Olmedo, Ignacio Sanudo; Capodieci, Nicola; Cavicchioli, Roberto A Perspective on Safety and Real-Time Issues for GPU Accelerated ADAS Inproceedings IECON 2018 - 44th Annual Conference of the IEEE Industrial Electronics Society, pp. 4071–4077, 2018. @inproceedings{sanudo_olmedo_perspective_2018, title = {A Perspective on Safety and Real-Time Issues for GPU Accelerated ADAS}, author = {Ignacio Sanudo Olmedo and Nicola Capodieci and Roberto Cavicchioli}, doi = {10.1109/IECON.2018.8591540}, year = {2018}, date = {2018-01-01}, booktitle = {IECON 2018 - 44th Annual Conference of the IEEE Industrial Electronics Society}, pages = {4071--4077}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
Olmedo, Ignacio Sanudo; Cavicchioli, Roberto; Capodieci, Nicola; Valente, Paolo; Bertogna, And Marko A survey on shared disk I/O management in virtualized environments under real time constraints Journal Article SIGBED REVIEW, 15 , pp. 57–63, 2018. Abstract | Links | BibTeX | Tags: @article{sanudo_olmedo_survey_2018, title = {A survey on shared disk I/O management in virtualized environments under real time constraints}, author = {Ignacio Sanudo Olmedo and Roberto Cavicchioli and Nicola Capodieci and Paolo Valente and And Marko Bertogna}, doi = {10.1145/3199610.3199618}, year = {2018}, date = {2018-01-01}, journal = {SIGBED REVIEW}, volume = {15}, pages = {57--63}, abstract = {In the embedded systems domain, hypervisors are increasingly being adopted to guarantee timing isolation and appropriate hardware resource sharing among different software components. However, managing concurrent and parallel requests to shared hardware resources in a predictable way still represents an open issue. We argue that hypervisors can be an effective means to achieve an efficient and predictable arbitration of competing requests to shared devices in order to satisfy real-time requirements. As a representative example, we consider the case for mass storage (I/O) devices like Hard Disk Drives (HDD) and Solid State Disks (SSD), whose access times are orders of magnitude higher than those of central memory and CPU caches, therefore having a greater impact on overall task delays. We provide a comprehensive and up-to-date survey of the literature on I/O management within virtualized environments, focusing on software solutions proposed in the open source community, and discussing their main limitations in terms of realtime performance. Then, we discuss how the research in this subject may evolve in the future, highlighting the importance of techniques that are focused on scheduling not uniquely the processing bandwidth, but also the access to other important shared resources, like I/O devices.}, keywords = {}, pubstate = {published}, tppubtype = {article} } In the embedded systems domain, hypervisors are increasingly being adopted to guarantee timing isolation and appropriate hardware resource sharing among different software components. However, managing concurrent and parallel requests to shared hardware resources in a predictable way still represents an open issue. We argue that hypervisors can be an effective means to achieve an efficient and predictable arbitration of competing requests to shared devices in order to satisfy real-time requirements. As a representative example, we consider the case for mass storage (I/O) devices like Hard Disk Drives (HDD) and Solid State Disks (SSD), whose access times are orders of magnitude higher than those of central memory and CPU caches, therefore having a greater impact on overall task delays. We provide a comprehensive and up-to-date survey of the literature on I/O management within virtualized environments, focusing on software solutions proposed in the open source community, and discussing their main limitations in terms of realtime performance. Then, we discuss how the research in this subject may evolve in the future, highlighting the importance of techniques that are focused on scheduling not uniquely the processing bandwidth, but also the access to other important shared resources, like I/O devices. |
Capodieci, Nicola; Cavicchioli, Roberto; Bertogna, Marko Work-in-Progress: NVIDIA GPU Scheduling Details in Virtualized Environments Inproceedings 2018 International Conference on Embedded Software (EMSOFT), 2018. @inproceedings{capodieci_work--progress_2018, title = {Work-in-Progress: NVIDIA GPU Scheduling Details in Virtualized Environments}, author = {Nicola Capodieci and Roberto Cavicchioli and Marko Bertogna}, doi = {10.1109/EMSOFT.2018.8537220}, year = {2018}, date = {2018-01-01}, booktitle = {2018 International Conference on Embedded Software (EMSOFT)}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
Cavicchioli,; Capodieci,; Bertogna, Memory interference characterization between CPU cores and integrated GPUs in mixed-criticality platforms Inproceedings IEEE International Conference on Emerging Technologies and Factory Automation, ETFA, pp. 1–10, Institute of Electrical and Electronics Engineers Inc., 2018, ISBN: 978-1-5090-6505-9. Abstract | Links | BibTeX | Tags: @inproceedings{cavicchioli_memory_2018, title = {Memory interference characterization between CPU cores and integrated GPUs in mixed-criticality platforms}, author = {R Cavicchioli and N Capodieci and M Bertogna}, url = {http://ieeexplore.ieee.org/xpl/conhome.jsp?punumber=1000260}, doi = {10.1109/ETFA.2017.8247615}, isbn = {978-1-5090-6505-9}, year = {2018}, date = {2018-01-01}, booktitle = {IEEE International Conference on Emerging Technologies and Factory Automation, ETFA}, pages = {1--10}, publisher = {Institute of Electrical and Electronics Engineers Inc.}, abstract = {Most of today’s mixed criticality platforms feature Systems on Chip (SoC) where a multi-core CPU complex (the host) competes with an integrated Graphic Processor Unit (iGPU, the device) for accessing central memory. The multi-core host and the iGPU share the same memory controller, which has to arbitrate data access to both clients through often undisclosed or non-priority driven mechanisms. Such aspect becomes critical when the iGPU is a high performance massively parallel computing complex potentially able to saturate the available DRAM bandwidth of the considered SoC. The contribution of this paper is to qualitatively analyze and characterize the conflicts due to parallel accesses to main memory by both CPU cores and iGPU, so to motivate the need of novel paradigms for memory centric scheduling mechanisms. We analyzed different well known and commercially available platforms in order to estimate variations in throughput and latencies within various memory access patterns, both at host and device side.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Most of today’s mixed criticality platforms feature Systems on Chip (SoC) where a multi-core CPU complex (the host) competes with an integrated Graphic Processor Unit (iGPU, the device) for accessing central memory. The multi-core host and the iGPU share the same memory controller, which has to arbitrate data access to both clients through often undisclosed or non-priority driven mechanisms. Such aspect becomes critical when the iGPU is a high performance massively parallel computing complex potentially able to saturate the available DRAM bandwidth of the considered SoC. The contribution of this paper is to qualitatively analyze and characterize the conflicts due to parallel accesses to main memory by both CPU cores and iGPU, so to motivate the need of novel paradigms for memory centric scheduling mechanisms. We analyzed different well known and commercially available platforms in order to estimate variations in throughput and latencies within various memory access patterns, both at host and device side. |
Brilli, Gianluca; Burgio, Paolo; Bertogna, Marko Convolutional Neural Networks on Embedded Automotive Platforms: A Qualitative Comparison Inproceedings 2018 International Conference on High Performance Computing Simulation (HPCS), pp. 496–499, 2018. Links | BibTeX | Tags: application domains, computer vision, computer vision applications, computer vision object detection, computing system, convolution, convolutional neural networks, embedded automotive platforms, Embedded systems, feedforward neural nets, field programmable gate arrays, Graphics processing units, object detection, Power demand, qualitative comparison, Real-time systems, system-on-chip, Task analysis @inproceedings{brilli_convolutional_2018, title = {Convolutional Neural Networks on Embedded Automotive Platforms: A Qualitative Comparison}, author = {Gianluca Brilli and Paolo Burgio and Marko Bertogna}, doi = {10.1109/HPCS.2018.00084}, year = {2018}, date = {2018-01-01}, booktitle = {2018 International Conference on High Performance Computing Simulation (HPCS)}, pages = {496--499}, keywords = {application domains, computer vision, computer vision applications, computer vision object detection, computing system, convolution, convolutional neural networks, embedded automotive platforms, Embedded systems, feedforward neural nets, field programmable gate arrays, Graphics processing units, object detection, Power demand, qualitative comparison, Real-time systems, system-on-chip, Task analysis}, pubstate = {published}, tppubtype = {inproceedings} } |
2017 |
Burgio, Paolo; Bertogna, Marko; Capodieci, Nicola; Cavicchioli, Roberto; Sojka, Michal; Houdek, Přemysl; Marongiu, Andrea; Gai, Paolo; Scordino, Claudio; Morelli, Bruno A software stack for next-generation automotive systems on many-core heterogeneous platforms Journal Article MICROPROCESSORS AND MICROSYSTEMS, pp. 299–311, 2017. @article{burgio_software_2017, title = {A software stack for next-generation automotive systems on many-core heterogeneous platforms}, author = {Paolo Burgio and Marko Bertogna and Nicola Capodieci and Roberto Cavicchioli and Michal Sojka and Přemysl Houdek and Andrea Marongiu and Paolo Gai and Claudio Scordino and Bruno Morelli}, year = {2017}, date = {2017-01-01}, journal = {MICROPROCESSORS AND MICROSYSTEMS}, pages = {299--311}, abstract = {The next-generation of partially and fully autonomous cars will be powered by embedded many-core platforms. Technologies for Advanced Driver Assistance Systems (ADAS) need to process an unprecedented amount of data within tight power budgets, making those platform the ideal candidate architecture. Integrating tens-to-hundreds of computing elements that run at lower frequencies allows obtaining impressive performance capabilities at a reduced power consumption, that meets the size, weight and power (SWaP) budget of automotive systems. Unfortunately, the inherent architectural complexity of many-core platforms makes it almost impossible to derive real-time guarantees using “traditional” state-of-the-art techniques, ultimately preventing their adoption in real industrial settings. Having impressive average performances with no guaranteed bounds on the response times of the critical computing activities is of little if no use in safety-critical applications. Project Hercules will address this issue, and provide the required technological infrastructure to exploit the tremendous potential of embedded many-cores for the next generation of automotive systems. This work gives an overview of the integrated Hercules software framework, which allows achieving an order-of-magnitude of predictable performance on top of cutting-edge Commercial-Off-The-Shelf components (COTS). The proposed software stack will let both real-time and non real-time application coexist on next-generation, power-efficient embedded platforms, with preserved timing guarantees.}, keywords = {}, pubstate = {published}, tppubtype = {article} } The next-generation of partially and fully autonomous cars will be powered by embedded many-core platforms. Technologies for Advanced Driver Assistance Systems (ADAS) need to process an unprecedented amount of data within tight power budgets, making those platform the ideal candidate architecture. Integrating tens-to-hundreds of computing elements that run at lower frequencies allows obtaining impressive performance capabilities at a reduced power consumption, that meets the size, weight and power (SWaP) budget of automotive systems. Unfortunately, the inherent architectural complexity of many-core platforms makes it almost impossible to derive real-time guarantees using “traditional” state-of-the-art techniques, ultimately preventing their adoption in real industrial settings. Having impressive average performances with no guaranteed bounds on the response times of the critical computing activities is of little if no use in safety-critical applications. Project Hercules will address this issue, and provide the required technological infrastructure to exploit the tremendous potential of embedded many-cores for the next generation of automotive systems. This work gives an overview of the integrated Hercules software framework, which allows achieving an order-of-magnitude of predictable performance on top of cutting-edge Commercial-Off-The-Shelf components (COTS). The proposed software stack will let both real-time and non real-time application coexist on next-generation, power-efficient embedded platforms, with preserved timing guarantees. |
Bertogna, Marko; Burgio, Paolo; Cabri, Giacomo; Capodieci, Nicola Adaptive coordination in autonomous driving: Motivations and perspectives Inproceedings Proceedings - 2017 IEEE 26th International Conference on Enabling Technologies: Infrastructure for Collaborative Enterprises, WETICE 2017, pp. 15–17, Institute of Electrical and Electronics Engineers Inc., 2017, ISBN: 978-1-5386-1758-8. Links | BibTeX | Tags: Adaptation, Autonomous driving, Business, Computer Networks and Communications, Hardware and Architecture, Management and Accounting (miscellaneous), Socio-technical systems @inproceedings{bertogna_adaptive_2017, title = {Adaptive coordination in autonomous driving: Motivations and perspectives}, author = {Marko Bertogna and Paolo Burgio and Giacomo Cabri and Nicola Capodieci}, doi = {10.1109/WETICE.2017.45}, isbn = {978-1-5386-1758-8}, year = {2017}, date = {2017-01-01}, booktitle = {Proceedings - 2017 IEEE 26th International Conference on Enabling Technologies: Infrastructure for Collaborative Enterprises, WETICE 2017}, pages = {15--17}, publisher = {Institute of Electrical and Electronics Engineers Inc.}, keywords = {Adaptation, Autonomous driving, Business, Computer Networks and Communications, Hardware and Architecture, Management and Accounting (miscellaneous), Socio-technical systems}, pubstate = {published}, tppubtype = {inproceedings} } |
Capodieci, Nicola; Cavicchioli, Roberto; Valente, Paolo; Bertogna, Marko SiGAMMA: Server based integrated GPU arbitration mechanism for memory accesses Inproceedings ACM International Conference Proceeding Series, pp. 48–57, Association for Computing Machinery, 2017, ISBN: 978-1-4503-5286-4. Links | BibTeX | Tags: 1707, Computer Networks and Communications, GP-GPU, Human-Computer Interaction, Memory-Centric scheduling, PREM, Software @inproceedings{capodieci_sigamma_2017, title = {SiGAMMA: Server based integrated GPU arbitration mechanism for memory accesses}, author = {Nicola Capodieci and Roberto Cavicchioli and Paolo Valente and Marko Bertogna}, url = {http://portal.acm.org/}, doi = {10.1145/3139258.3139270}, isbn = {978-1-4503-5286-4}, year = {2017}, date = {2017-01-01}, booktitle = {ACM International Conference Proceeding Series}, volume = {131837}, pages = {48--57}, publisher = {Association for Computing Machinery}, keywords = {1707, Computer Networks and Communications, GP-GPU, Human-Computer Interaction, Memory-Centric scheduling, PREM, Software}, pubstate = {published}, tppubtype = {inproceedings} } |
Farahani, Ali; Nazemi, Eslam; Cabri, Giacomo; Capodieci, Nicola Enabling Autonomic Computing Support for the JADE Agent Platform Journal Article SCALABLE COMPUTING. PRACTICE AND EXPERIENCE, 18 , pp. 91–103, 2017. Links | BibTeX | Tags: Autonomic computing, Development Environment, JADE, Multi-agent Systems @article{farahani_enabling_2017, title = {Enabling Autonomic Computing Support for the JADE Agent Platform}, author = {Ali Farahani and Eslam Nazemi and Giacomo Cabri and Nicola Capodieci}, url = {http://www.scpe.org/index.php/scpe/article/view/1235/499}, doi = {10.12694/scpe.v18i1.1235}, year = {2017}, date = {2017-01-01}, journal = {SCALABLE COMPUTING. PRACTICE AND EXPERIENCE}, volume = {18}, pages = {91--103}, keywords = {Autonomic computing, Development Environment, JADE, Multi-agent Systems}, pubstate = {published}, tppubtype = {article} } |
2016 |
Capodieci, Nicola; Pagani, Giuliano Andrea; Cabri, Giacomo; Aiello, Marco An adaptive agent-based system for deregulated smart grids Journal Article SERVICE ORIENTED COMPUTING AND APPLICATIONS, 10 , pp. 185–205, 2016. Links | BibTeX | Tags: Energy market, Smart Grids, Software agents @article{capodieci_adaptive_2016, title = {An adaptive agent-based system for deregulated smart grids}, author = {Nicola Capodieci and Giuliano Andrea Pagani and Giacomo Cabri and Marco Aiello}, url = {http://link.springer.com/article/10.1007%2Fs11761-015-0180-3}, doi = {10.1007/s11761-015-0180-3}, year = {2016}, date = {2016-01-01}, journal = {SERVICE ORIENTED COMPUTING AND APPLICATIONS}, volume = {10}, pages = {185--205}, keywords = {Energy market, Smart Grids, Software agents}, pubstate = {published}, tppubtype = {article} } |
Galassi, Marco; Capodieci, Nicola; Cabri, Giacomo; Leonardi, Letizia Evolutionary Strategies for Novelty-Based Online Neuroevolution in Swarm Robotics Inproceedings Proceedings of the 2016 IEEE International Conference on Systems, Man, and Cybernetics, pp. 2026–2032, IEEE, 2016, ISBN: 978-1-5090-1897-0. Links | BibTeX | Tags: Distributed evolutionary algorithms, Dynamics of evolution, Embedded neural networks, Evolutionary strategies, Information sharing, Inter-robot communication, Neuro evolutions, Objective functions @inproceedings{galassi_evolutionary_2016, title = {Evolutionary Strategies for Novelty-Based Online Neuroevolution in Swarm Robotics}, author = {Marco Galassi and Nicola Capodieci and Giacomo Cabri and Letizia Leonardi}, doi = {10.1109/SMC.2016.7844538}, isbn = {978-1-5090-1897-0}, year = {2016}, date = {2016-01-01}, booktitle = {Proceedings of the 2016 IEEE International Conference on Systems, Man, and Cybernetics}, pages = {2026--2032}, publisher = {IEEE}, keywords = {Distributed evolutionary algorithms, Dynamics of evolution, Embedded neural networks, Evolutionary strategies, Information sharing, Inter-robot communication, Neuro evolutions, Objective functions}, pubstate = {published}, tppubtype = {inproceedings} } |
Capodieci, Nicola; Hart, Emma; Cabri, Giacomo Artificial immunology for collective adaptive systems design and implementation Journal Article ACM TRANSACTIONS ON AUTONOMOUS AND ADAPTIVE SYSTEMS, 11 , pp. 1–25, 2016. Links | BibTeX | Tags: Artificial immune system, Autonomic computing, Computer Science (miscellaneous), Control and Systems Engineering, Framework, Software @article{capodieci_artificial_2016, title = {Artificial immunology for collective adaptive systems design and implementation}, author = {Nicola Capodieci and Emma Hart and Giacomo Cabri}, url = {http://dl.acm.org/citation.cfm?doid=2952298.2897372}, doi = {10.1145/2897372}, year = {2016}, date = {2016-01-01}, journal = {ACM TRANSACTIONS ON AUTONOMOUS AND ADAPTIVE SYSTEMS}, volume = {11}, pages = {1--25}, keywords = {Artificial immune system, Autonomic computing, Computer Science (miscellaneous), Control and Systems Engineering, Framework, Software}, pubstate = {published}, tppubtype = {article} } |
Burgio, Paolo; Bertogna, Marko; Olmedo, Ignacio Sanudo; Gai, Paolo; Marongiu, Andrea; Sojka, Michal A Software Stack for Next-Generation Automotive Systems on Many-Core Heterogeneous Platforms Inproceedings Proceedings of the 9th Euromicro Conference on Digital System Design (DSD 2016), pp. 55–59, 2016, ISBN: 978-1-5090-2817-7. @inproceedings{burgio_software_2016, title = {A Software Stack for Next-Generation Automotive Systems on Many-Core Heterogeneous Platforms}, author = {Paolo Burgio and Marko Bertogna and Ignacio Sanudo Olmedo and Paolo Gai and Andrea Marongiu and Michal Sojka}, doi = {10.1109/DSD.2016.84}, isbn = {978-1-5090-2817-7}, year = {2016}, date = {2016-01-01}, booktitle = {Proceedings of the 9th Euromicro Conference on Digital System Design (DSD 2016)}, pages = {55--59}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
2015 |
Capodieci, Nicola; Burgio, Paolo Efficient Implementation of Genetic Algorithms on GP-GPU with Scheduled Persistent CUDA Threads Inproceedings Seventh International Symposium on Parallel Architectures, Algorithms, and Programming, PAAP 2015. Proceedings, pp. 6–12, IEEE - Computer Society, Los Alamitos CA, Washington, Tokyo, 2015, ISBN: 978-1-4673-9117-7. Links | BibTeX | Tags: CUDA, Genetic Algorithms, GP-GPUs, Persistent Threads @inproceedings{capodieci_efficient_2015, title = {Efficient Implementation of Genetic Algorithms on GP-GPU with Scheduled Persistent CUDA Threads}, author = {Nicola Capodieci and Paolo Burgio}, doi = {10.1109/PAAP.2015.13}, isbn = {978-1-4673-9117-7}, year = {2015}, date = {2015-01-01}, booktitle = {Seventh International Symposium on Parallel Architectures, Algorithms, and Programming, PAAP 2015. Proceedings}, pages = {6--12}, publisher = {IEEE - Computer Society}, address = {Los Alamitos CA, Washington, Tokyo}, keywords = {CUDA, Genetic Algorithms, GP-GPUs, Persistent Threads}, pubstate = {published}, tppubtype = {inproceedings} } |
Alsina, EMANUEL FEDERICO; Capodieci, Nicola; Cabri, Giacomo; Regattieri, Alberto; Gamberi, Mauro; Pilati, Francesco; Faccio, Maurizio 2015 IEEE Symposium Series on Computational Intelligence, SSCI 2015: Proceedings, pp. 1021–1028, IEEE - Institute of Electrical and Electronics Engineers Inc., Piscataway, 2015, ISBN: 978-1-4799-7560-0. Links | BibTeX | Tags: assembling, Evolutionary Computation, Genetic Algorithms @inproceedings{alsina_influence_2015, title = {The influence of the picking times of the components in time and space assembly line balancing problems: An approach with evolutionary algorithms}, author = {EMANUEL FEDERICO Alsina and Nicola Capodieci and Giacomo Cabri and Alberto Regattieri and Mauro Gamberi and Francesco Pilati and Maurizio Faccio}, doi = {10.1109/SSCI.2015.148}, isbn = {978-1-4799-7560-0}, year = {2015}, date = {2015-01-01}, booktitle = {2015 IEEE Symposium Series on Computational Intelligence, SSCI 2015: Proceedings}, pages = {1021--1028}, publisher = {IEEE - Institute of Electrical and Electronics Engineers Inc.}, address = {Piscataway}, keywords = {assembling, Evolutionary Computation, Genetic Algorithms}, pubstate = {published}, tppubtype = {inproceedings} } |
Puviani, Mariachiara; Cabri, Giacomo; Capodieci, Nicola; Leonardi, Letizia Building self-adaptive systems by adaptation patterns integrated into agent methodologies Incollection 7th International Conference on Agents and Artificial Intelligence, ICAART 2015, 9494 , pp. 58–75, Springer, Heidelberg, 2015, ISBN: 978-3-319-27946-6. Links | BibTeX | Tags: Adaptation pattern, Computer Science (all), Methodology, Multi-agent system, Theoretical Computer Science @incollection{puviani_building_2015, title = {Building self-adaptive systems by adaptation patterns integrated into agent methodologies}, author = {Mariachiara Puviani and Giacomo Cabri and Nicola Capodieci and Letizia Leonardi}, doi = {10.1007/978-3-319-27947-3_4}, isbn = {978-3-319-27946-6}, year = {2015}, date = {2015-01-01}, booktitle = {7th International Conference on Agents and Artificial Intelligence, ICAART 2015}, volume = {9494}, pages = {58--75}, publisher = {Springer}, address = {Heidelberg}, keywords = {Adaptation pattern, Computer Science (all), Methodology, Multi-agent system, Theoretical Computer Science}, pubstate = {published}, tppubtype = {incollection} } |
Sereni,; Vandelli,; Cavicchioli,; Larcher,; Veksler,; Bersuker, Substrate and temperature influence on the trap density distribution in high-k III-V MOSFETs Inproceedings IEEE International Reliability Physics Symposium Proceedings, pp. E61–2E65, Institute of Electrical and Electronics Engineers Inc., 2015, ISBN: 978-1-4673-7362-3. Abstract | Links | BibTeX | Tags: C-V and G-V curves, high-k, III-V, InGaAs, interface and border traps, modeling and simulations @inproceedings{sereni_substrate_2015, title = {Substrate and temperature influence on the trap density distribution in high-k III-V MOSFETs}, author = {G Sereni and L Vandelli and R Cavicchioli and L Larcher and D Veksler and G Bersuker}, url = {http://ieeexplore.ieee.org/xpl/conhome.jsp?punumber=1000627}, doi = {10.1109/IRPS.2015.7112690}, isbn = {978-1-4673-7362-3}, year = {2015}, date = {2015-01-01}, booktitle = {IEEE International Reliability Physics Symposium Proceedings}, volume = {2015-}, pages = {E61--2E65}, publisher = {Institute of Electrical and Electronics Engineers Inc.}, abstract = {In this work we apply a new spectroscopic technique based on the simulation of capacitance and conductance measurements to investigate the defect density in high-ț/III-V MOSFETs. This technique exploits the simulation of C-V and GV curves measured over a wide frequency range to extract the defect density map in the energy-position domain. The technique was used to investigate the role of the substrate material and the temperature on the interfacial and bulk defect distributions in the gate stack in InGaAs MOS capacitors grown on both InP and Si substrate. It was found that the substrate material does not affect the defect density in the gate dielectric stack. Applying the technique to C-V and G-V curves measured at different temperatures allows extracting the relaxation energy of defects, an important parameter connected to their atomic nature.}, keywords = {C-V and G-V curves, high-k, III-V, InGaAs, interface and border traps, modeling and simulations}, pubstate = {published}, tppubtype = {inproceedings} } In this work we apply a new spectroscopic technique based on the simulation of capacitance and conductance measurements to investigate the defect density in high-ț/III-V MOSFETs. This technique exploits the simulation of C-V and GV curves measured over a wide frequency range to extract the defect density map in the energy-position domain. The technique was used to investigate the role of the substrate material and the temperature on the interfacial and bulk defect distributions in the gate stack in InGaAs MOS capacitors grown on both InP and Si substrate. It was found that the substrate material does not affect the defect density in the gate dielectric stack. Applying the technique to C-V and G-V curves measured at different temperatures allows extracting the relaxation energy of defects, an important parameter connected to their atomic nature. |
Pinho, Luís Miguel; Nélis, Vincent; Yomsi, Patrick Meumeu; Quiñones, Eduardo; Bertogna, Marko; Burgio, Paolo; Marongiu, Andrea; Scordino, Claudio; Gai, Paolo; Ramponi, Michele; Mardiak, Michal P-SOCRATES: A parallel software framework for time-critical many-core systems Journal Article MICROPROCESSORS AND MICROSYSTEMS, 39 , pp. 1190–1203, 2015. Links | BibTeX | Tags: Artificial Intelligence, Computer Networks and Communications, Embedded systems, Hardware and Architecture, many-core systems, Parallel programming models, Real-time scheduling, Real-time systems, Software, WCET analysis @article{pinho_p-socrates_2015, title = {P-SOCRATES: A parallel software framework for time-critical many-core systems}, author = {Luís Miguel Pinho and Vincent Nélis and Patrick Meumeu Yomsi and Eduardo Quiñones and Marko Bertogna and Paolo Burgio and Andrea Marongiu and Claudio Scordino and Paolo Gai and Michele Ramponi and Michal Mardiak}, url = {http://www.sciencedirect.com/science/article/pii/S0141933115000836}, doi = {10.1016/j.micpro.2015.06.004}, year = {2015}, date = {2015-01-01}, journal = {MICROPROCESSORS AND MICROSYSTEMS}, volume = {39}, pages = {1190--1203}, keywords = {Artificial Intelligence, Computer Networks and Communications, Embedded systems, Hardware and Architecture, many-core systems, Parallel programming models, Real-time scheduling, Real-time systems, Software, WCET analysis}, pubstate = {published}, tppubtype = {article} } |
Burgio, Paolo; Marongiu, Andrea; Valente, Paolo; Bertogna, Marko A memory-centric approach to enable timing-predictability within embedded many-core accelerators Inproceedings CSI Symposium on Real-Time and Embedded Systems and Technologies, RTEST 2015, pp. 1–8, IEEE - Institute of Electrical and Electronics Engineers Inc., Piscataway, NJ, 2015, ISBN: 978-1-4673-8047-8. Links | BibTeX | Tags: Hardware and Architecture @inproceedings{burgio_memory-centric_2015, title = {A memory-centric approach to enable timing-predictability within embedded many-core accelerators}, author = {Paolo Burgio and Andrea Marongiu and Paolo Valente and Marko Bertogna}, doi = {10.1109/RTEST.2015.7369851}, isbn = {978-1-4673-8047-8}, year = {2015}, date = {2015-01-01}, booktitle = {CSI Symposium on Real-Time and Embedded Systems and Technologies, RTEST 2015}, pages = {1--8}, publisher = {IEEE - Institute of Electrical and Electronics Engineers Inc.}, address = {Piscataway, NJ}, keywords = {Hardware and Architecture}, pubstate = {published}, tppubtype = {inproceedings} } |
2014 |
Capodieci,; Hart,; Cabri, Idiotypic networks for evolutionary controllers in virtual creatures Inproceedings Artificial Life 14: Proceedings of the 14th International Conference on the Simulation and Synthesis of Living Systems (Alife 14), pp. 192–199, MIT Press, Cambridge, 2014, ISBN: 978-0-262-32621-6. Abstract | BibTeX | Tags: Autonomic computing, Evolutionary Computation @inproceedings{capodieci_idiotypic_2014, title = {Idiotypic networks for evolutionary controllers in virtual creatures}, author = {N Capodieci and E Hart and G Cabri}, isbn = {978-0-262-32621-6}, year = {2014}, date = {2014-01-01}, booktitle = {Artificial Life 14: Proceedings of the 14th International Conference on the Simulation and Synthesis of Living Systems (Alife 14)}, pages = {192--199}, publisher = {MIT Press}, address = {Cambridge}, abstract = {We propose a novel method for evolving adaptive locomotive strategies for virtual limbless creatures that addresses both functional and non-functional requirements, respectively the ability to avoid obstacles and to minimise spent energy. We describe an approach inspired by artificial immune systems, based on a dual-layer idiotypic network that results in a completely decentralised controller. Results are compared to a set of five fixed locomotion strategies and show that adaptive control can evolve and simultaneously optimise energy requirements, starting from the same locomotion non-adaptive strategies.}, keywords = {Autonomic computing, Evolutionary Computation}, pubstate = {published}, tppubtype = {inproceedings} } We propose a novel method for evolving adaptive locomotive strategies for virtual limbless creatures that addresses both functional and non-functional requirements, respectively the ability to avoid obstacles and to minimise spent energy. We describe an approach inspired by artificial immune systems, based on a dual-layer idiotypic network that results in a completely decentralised controller. Results are compared to a set of five fixed locomotion strategies and show that adaptive control can evolve and simultaneously optimise energy requirements, starting from the same locomotion non-adaptive strategies. |
Giacomo, Cabri; Nicola, Capodieci; Luca, Cesari; Nicola, Rocco De; Rosario, Pugliese; Francesco, Tiezzi; Franco, Zambonelli Self-Expression and Dynamic Attribute-based Ensembles in SCEL Inproceedings Proceedings of 6th International Symposium on Leveraging Applications (ISoLA 2014), pp. 147–163, 2014. Abstract | BibTeX | Tags: Autonomic computing @inproceedings{giacomo_self-expression_2014, title = {Self-Expression and Dynamic Attribute-based Ensembles in SCEL}, author = {Cabri Giacomo and Capodieci Nicola and Cesari Luca and Rocco De Nicola and Pugliese Rosario and Tiezzi Francesco and Zambonelli Franco}, year = {2014}, date = {2014-01-01}, booktitle = {Proceedings of 6th International Symposium on Leveraging Applications (ISoLA 2014)}, volume = {8802}, pages = {147--163}, abstract = {In the field of distributed autonomous computing the current trend is to develop cooperating computational entities enabled with enhanced self-* properties. The expression self-* indicates the possibility of an element inside an ensemble, i.e. a set of collaborative autonomic components, to self organize, heal (repair), optimize and configure with little or no human interaction. We focus on a self-* property called Self-Expression, defined as the ability to deploy run-time changes of the coordination pattern of the observed ensemble; the goal of the ensemble is to achieve adaptivity by meeting functional and non-functional requirements when specific tasks have to be completed. The purpose of this paper is to rigorously present the mechanisms involved whenever a change in the coordination pattern is needed, and the interactions that take place. To this aim, we use SCEL (Software Component Ensemble Language), a formal language for describing autonomic components and their interactions, featuring an highly dynamic and flexible way to form ensembles based on components' attributes.}, keywords = {Autonomic computing}, pubstate = {published}, tppubtype = {inproceedings} } In the field of distributed autonomous computing the current trend is to develop cooperating computational entities enabled with enhanced self-* properties. The expression self-* indicates the possibility of an element inside an ensemble, i.e. a set of collaborative autonomic components, to self organize, heal (repair), optimize and configure with little or no human interaction. We focus on a self-* property called Self-Expression, defined as the ability to deploy run-time changes of the coordination pattern of the observed ensemble; the goal of the ensemble is to achieve adaptivity by meeting functional and non-functional requirements when specific tasks have to be completed. The purpose of this paper is to rigorously present the mechanisms involved whenever a change in the coordination pattern is needed, and the interactions that take place. To this aim, we use SCEL (Software Component Ensemble Language), a formal language for describing autonomic components and their interactions, featuring an highly dynamic and flexible way to form ensembles based on components' attributes. |
Capodieci,; Hart,; Cabri, Artificial Immune System driven evolution in Swarm Chemistry Inproceedings Proceedings of the 2014 IEEE Eighth International Conference on Self-Adaptive and Self-Organizing Systems, pp. 40–49, IEEE, Piscataway. NJ, 2014, ISBN: 978-1-4799-5367-7. Abstract | Links | BibTeX | Tags: Artificial Immune Systems, Autonomic computing, Swarm chemestry @inproceedings{capodieci_artificial_2014, title = {Artificial Immune System driven evolution in Swarm Chemistry}, author = {N Capodieci and E Hart and G Cabri}, doi = {10.1109/SASO.2014.16}, isbn = {978-1-4799-5367-7}, year = {2014}, date = {2014-01-01}, booktitle = {Proceedings of the 2014 IEEE Eighth International Conference on Self-Adaptive and Self-Organizing Systems}, pages = {40--49}, publisher = {IEEE}, address = {Piscataway. NJ}, abstract = {Morphogenetic engineering represents an interesting field in which models, frameworks and algorithms can be tested in order to study how self-* properties and emergent behaviours can arise in potentially complex and distributed systems. In this field, the morphogenetic model we will refer to is swarm chemistry, since a well known challenge in this dynamical process concerns discovering mechanisms for providing evolution within coalescing systems of particles. These systems consist in sets of moving particles able to self-organise in order to create shapes or geometrical formations that provide robustness towards external perturbations. We present a novel mechanism for providing evolutionary features in swarm chemistry that takes inspiration from artificial immune system literature, more specifically regarding idiotypic networks. Starting from a restricted set of chemical recipes, we show that the system evolves to new states, using an autonomous method of detecting new shapes and behaviours free from any human interaction.}, keywords = {Artificial Immune Systems, Autonomic computing, Swarm chemestry}, pubstate = {published}, tppubtype = {inproceedings} } Morphogenetic engineering represents an interesting field in which models, frameworks and algorithms can be tested in order to study how self-* properties and emergent behaviours can arise in potentially complex and distributed systems. In this field, the morphogenetic model we will refer to is swarm chemistry, since a well known challenge in this dynamical process concerns discovering mechanisms for providing evolution within coalescing systems of particles. These systems consist in sets of moving particles able to self-organise in order to create shapes or geometrical formations that provide robustness towards external perturbations. We present a novel mechanism for providing evolutionary features in swarm chemistry that takes inspiration from artificial immune system literature, more specifically regarding idiotypic networks. Starting from a restricted set of chemical recipes, we show that the system evolves to new states, using an autonomous method of detecting new shapes and behaviours free from any human interaction. |
Capodieci,; Cabri,; Zambonelli, Modeling Self-Expression by Holons Inproceedings Proceedings of the 2014 International Conference on High Performance Computing & Simulation (HPCS 2014), pp. 424–431, IEEE, Los Alamitos, 2014, ISBN: 978-1-4799-5311-0. Abstract | BibTeX | Tags: Autonomic computing, Holonic frameworks @inproceedings{capodieci_modeling_2014, title = {Modeling Self-Expression by Holons}, author = {N Capodieci and G Cabri and F Zambonelli}, isbn = {978-1-4799-5311-0}, year = {2014}, date = {2014-01-01}, booktitle = {Proceedings of the 2014 International Conference on High Performance Computing & Simulation (HPCS 2014)}, pages = {424--431}, publisher = {IEEE}, address = {Los Alamitos}, abstract = {In the field of autonomic computing the current trend is to develop cooperating computational entities enabled with enhanced self-* properties. The term self-* indicates the possibility of an element inside a set (an ensemble) composed of many potentially heterogeneous components to self organize, heal (repair), optimize and configure without little or no human interaction. The goal is to increase the level of adaptivity for both the ensemble and the single element, especially in scenarios in which stopping a system for further tuning is unfeasible. In this paper, we propose an approach to model and enable the capability of adopting different collaboration patterns in ensembles of autonomic components inserted in open and nondeterministic environments. This model takes inspiration from the holonic organisation of multi agent systems and from a Self-* property, called Self-Expression, which is defined as the property of a distributed system to change its collaboration pattern at run time in order to better adapt its execution of tasks in unknown situations. Ensembles able to deploy Self-Expression, show a higher level of adaptation and the concept of Self-Expression can be easily exploited though a Self-Repeating structure like a hierarchy of holons.}, keywords = {Autonomic computing, Holonic frameworks}, pubstate = {published}, tppubtype = {inproceedings} } In the field of autonomic computing the current trend is to develop cooperating computational entities enabled with enhanced self-* properties. The term self-* indicates the possibility of an element inside a set (an ensemble) composed of many potentially heterogeneous components to self organize, heal (repair), optimize and configure without little or no human interaction. The goal is to increase the level of adaptivity for both the ensemble and the single element, especially in scenarios in which stopping a system for further tuning is unfeasible. In this paper, we propose an approach to model and enable the capability of adopting different collaboration patterns in ensembles of autonomic components inserted in open and nondeterministic environments. This model takes inspiration from the holonic organisation of multi agent systems and from a Self-* property, called Self-Expression, which is defined as the property of a distributed system to change its collaboration pattern at run time in order to better adapt its execution of tasks in unknown situations. Ensembles able to deploy Self-Expression, show a higher level of adaptation and the concept of Self-Expression can be easily exploited though a Self-Repeating structure like a hierarchy of holons. |
Capodieci,; Hart,; Cabri, Artificial Immune System in the context of Autonomic Computing: integrating design paradigms Inproceedings Proceedings of the 2014 conference companion on Genetic and evolutionary computation companion, pp. 21–22, ACM, New York, 2014, ISBN: 978-1-4503-2881-4. Abstract | BibTeX | Tags: Artificial Immune Systems, Autonomic computing @inproceedings{capodieci_artificial_2014-1, title = {Artificial Immune System in the context of Autonomic Computing: integrating design paradigms}, author = {N Capodieci and E Hart and G Cabri}, isbn = {978-1-4503-2881-4}, year = {2014}, date = {2014-01-01}, booktitle = {Proceedings of the 2014 conference companion on Genetic and evolutionary computation companion}, volume = {1}, pages = {21--22}, publisher = {ACM}, address = {New York}, abstract = {We describe a framework for developing autonomic computing systems, based on an analogy with the natural immune system. A detailed comparison between the autonomic computing literature and the field of cognitive immune networks is drawn, particularly with respect to ensembles of components (in automomic computing) and ensembles of cells (in immune systems). We show how current approaches to designing autonomic systems could be enriched by considering alternative design processes based on cognitive immune networks.}, keywords = {Artificial Immune Systems, Autonomic computing}, pubstate = {published}, tppubtype = {inproceedings} } We describe a framework for developing autonomic computing systems, based on an analogy with the natural immune system. A detailed comparison between the autonomic computing literature and the field of cognitive immune networks is drawn, particularly with respect to ensembles of components (in automomic computing) and ensembles of cells (in immune systems). We show how current approaches to designing autonomic systems could be enriched by considering alternative design processes based on cognitive immune networks. |
Burgio, Paolo; Danilo, Robin; Marongiu, Andrea; Coussy, Philippe; Benini, Luca A tightly-coupled hardware controller to improve scalability and programmability of shared-memory heterogeneous clusters Inproceedings Proceedings -Design, Automation and Test in Europe, DATE, pp. 1–4, Institute of Electrical and Electronics Engineers Inc., 2014, ISBN: 978-3-9815370-2-4. Abstract | Links | BibTeX | Tags: Computer architecture Hardware Integrated circuit interconnections Optimization Ports (Computers) Programming System-on-chip @inproceedings{burgio_tightly-coupled_2014, title = {A tightly-coupled hardware controller to improve scalability and programmability of shared-memory heterogeneous clusters}, author = {Paolo Burgio and Robin Danilo and Andrea Marongiu and Philippe Coussy and Luca Benini}, doi = {10.7873/DATE2014.038}, isbn = {978-3-9815370-2-4}, year = {2014}, date = {2014-01-01}, booktitle = {Proceedings -Design, Automation and Test in Europe, DATE}, pages = {1--4}, publisher = {Institute of Electrical and Electronics Engineers Inc.}, abstract = {Modern designs for embedded many-core systems increasingly include application-specific units to accelerate key computational kernels with orders-of-magnitude higher execution speed and energy efficiency compared to software counterparts. A promising architectural template is based on heterogeneous clusters, where simple RISC cores and specialized HW units (HWPU) communicate in a tightly-coupled manner via L1 shared memory. Efficiently integrating processors and a high number of HW Processing Units (HWPUs) in such an system poses two main challenges, namely, architectural scalability and programmability. In this paper we describe an optimized Data Pump (DP) which connects several accelerators to a restricted set of communication ports, and acts as a virtualization layer for programming, exposing FIFO queues to offload “HW tasks” to them through a set of lightweight APIs. In this work, we aim at optimizing both these mechanisms, for respectively reducing modules area and making programming sequence easier and lighter.}, keywords = {Computer architecture Hardware Integrated circuit interconnections Optimization Ports (Computers) Programming System-on-chip}, pubstate = {published}, tppubtype = {inproceedings} } Modern designs for embedded many-core systems increasingly include application-specific units to accelerate key computational kernels with orders-of-magnitude higher execution speed and energy efficiency compared to software counterparts. A promising architectural template is based on heterogeneous clusters, where simple RISC cores and specialized HW units (HWPU) communicate in a tightly-coupled manner via L1 shared memory. Efficiently integrating processors and a high number of HW Processing Units (HWPUs) in such an system poses two main challenges, namely, architectural scalability and programmability. In this paper we describe an optimized Data Pump (DP) which connects several accelerators to a restricted set of communication ports, and acts as a virtualization layer for programming, exposing FIFO queues to offload “HW tasks” to them through a set of lightweight APIs. In this work, we aim at optimizing both these mechanisms, for respectively reducing modules area and making programming sequence easier and lighter. |
Burgio, Paolo; Tagliavini, Giuseppe; Conti, Francesco; Marongiu, Andrea; Benini, Luca Tightly-coupled hardware support to dynamic parallelism acceleration in embedded shared memory clusters Inproceedings Design, Automation and Test in Europe Conference and Exhibition (DATE), 2014, pp. 1–6, 2014, ISBN: 978-3-9815370-2-4. Abstract | Links | BibTeX | Tags: Benchmarking, Computer programming, Design, Embedded systems, Engineering controlled terms: Application programming interfaces (API), Fine-grained parallelism, Hardware, Hardware implementations, Modern applications, Multi-core cluster, Parallel architectures, Programming abstractions, Runtime environments, Semantics Cluster-based architecture, Synthetic benchmark Engineering main heading: Scheduling @inproceedings{burgio_tightly-coupled_2014-1, title = {Tightly-coupled hardware support to dynamic parallelism acceleration in embedded shared memory clusters}, author = {Paolo Burgio and Giuseppe Tagliavini and Francesco Conti and Andrea Marongiu and Luca Benini}, doi = {10.7873/DATE.2014.169}, isbn = {978-3-9815370-2-4}, year = {2014}, date = {2014-01-01}, booktitle = {Design, Automation and Test in Europe Conference and Exhibition (DATE), 2014}, pages = {1--6}, abstract = {Modern designs for embedded systems are increasingly embracing cluster-based architectures, where small sets of cores communicate through tightly-coupled shared memory banks and high-performance interconnections. At the same time, the complexity of modern applications requires new programming abstractions to exploit dynamic and/or irregular parallelism on such platforms. Supporting dynamic parallelism in systems which i) are resource-constrained and ii) run applications with small units of work calls for a runtime environment which has minimal overhead for the scheduling of parallel tasks. In this work, we study the major sources of overhead in the implementation of OpenMP dynamic loops, sections and tasks, and propose a hardware implementation of a generic Scheduling Engine (HWSE) which fits the semantics of the three constructs. The HWSE is designed as a tightly-coupled block to the PEs within a multi-core cluster, communicating through a shared-memory interface. This allows very fast programming and synchronization with the controlling PEs, fundamental to achieving fast dynamic scheduling, and ultimately to enable fine-grained parallelism. We prove the effectiveness of our solutions with real applications and synthetic benchmarks, using a cycle-accurate virtual platform.}, keywords = {Benchmarking, Computer programming, Design, Embedded systems, Engineering controlled terms: Application programming interfaces (API), Fine-grained parallelism, Hardware, Hardware implementations, Modern applications, Multi-core cluster, Parallel architectures, Programming abstractions, Runtime environments, Semantics Cluster-based architecture, Synthetic benchmark Engineering main heading: Scheduling}, pubstate = {published}, tppubtype = {inproceedings} } Modern designs for embedded systems are increasingly embracing cluster-based architectures, where small sets of cores communicate through tightly-coupled shared memory banks and high-performance interconnections. At the same time, the complexity of modern applications requires new programming abstractions to exploit dynamic and/or irregular parallelism on such platforms. Supporting dynamic parallelism in systems which i) are resource-constrained and ii) run applications with small units of work calls for a runtime environment which has minimal overhead for the scheduling of parallel tasks. In this work, we study the major sources of overhead in the implementation of OpenMP dynamic loops, sections and tasks, and propose a hardware implementation of a generic Scheduling Engine (HWSE) which fits the semantics of the three constructs. The HWSE is designed as a tightly-coupled block to the PEs within a multi-core cluster, communicating through a shared-memory interface. This allows very fast programming and synchronization with the controlling PEs, fundamental to achieving fast dynamic scheduling, and ultimately to enable fine-grained parallelism. We prove the effectiveness of our solutions with real applications and synthetic benchmarks, using a cycle-accurate virtual platform. |
Burgio, Paolo; Marongiu, Andrea; Coussy, Philippe; Benini, Luca A HLS-Based Toolflow to Design Next-Generation Heterogeneous Many-Core Platforms with Shared Memory Inproceedings Proceedings - 2014 International Conference on Embedded and Ubiquitous Computing, EUC 2014, pp. 130–137, Institute of Electrical and Electronics Engineers Inc., 2014, ISBN: 978-0-7695-5249-1. Abstract | Links | BibTeX | Tags: CLUSTERED ARCHITECTURES, Communication, Computer Networks and Communications, design space exploration, Embedded systems, heterogeneous architectures, hls, Human-Computer Interaction, many-core systems, OPENMP, shared-memory systems, Software @inproceedings{burgio_hls-based_2014, title = {A HLS-Based Toolflow to Design Next-Generation Heterogeneous Many-Core Platforms with Shared Memory}, author = {Paolo Burgio and Andrea Marongiu and Philippe Coussy and Luca Benini}, doi = {10.1109/EUC.2014.27}, isbn = {978-0-7695-5249-1}, year = {2014}, date = {2014-01-01}, booktitle = {Proceedings - 2014 International Conference on Embedded and Ubiquitous Computing, EUC 2014}, pages = {130--137}, publisher = {Institute of Electrical and Electronics Engineers Inc.}, abstract = {This work describes how we use High-Level Synthesis to support design space exploration (DSE) of heterogeneous many-core systems. Modern embedded systems increasingly couple hardware accelerators and processing cores on the same chip, to trade specialization of the platform to an application domain for increased performance and energy efficiency. However, the process of designing such a platform is complex and error-prone, and requires skills on algorithmic aspects, hardware synthesis, and software engineering. DSE can partially be automated, and thus simplified, by coupling the use of HLS tools and virtual prototyping platforms. In this paper we enable the design space exploration of heterogeneous many-cores adopting a shared-memory architecture template, where communication and synchronization between the hardware accelerators and the cores happens through L1 shared memory. This communication infrastructure leverages a "zero-copy" scheme, which simplifies both the design process of the platform and the development of applications on top of it. Moreover, the shared-memory template perfectly fits the semantics of several high-level programming models, such as OpenMP. We provide programmers with simple yet powerful abstractions to exploit accelerators from within an OpenMP application, and propose a low-cost implementation of the necessary runtime support. An HLS-based automatic design flow is set up, to quickly explore the design space using a cycle-accurate virtual platform.}, keywords = {CLUSTERED ARCHITECTURES, Communication, Computer Networks and Communications, design space exploration, Embedded systems, heterogeneous architectures, hls, Human-Computer Interaction, many-core systems, OPENMP, shared-memory systems, Software}, pubstate = {published}, tppubtype = {inproceedings} } This work describes how we use High-Level Synthesis to support design space exploration (DSE) of heterogeneous many-core systems. Modern embedded systems increasingly couple hardware accelerators and processing cores on the same chip, to trade specialization of the platform to an application domain for increased performance and energy efficiency. However, the process of designing such a platform is complex and error-prone, and requires skills on algorithmic aspects, hardware synthesis, and software engineering. DSE can partially be automated, and thus simplified, by coupling the use of HLS tools and virtual prototyping platforms. In this paper we enable the design space exploration of heterogeneous many-cores adopting a shared-memory architecture template, where communication and synchronization between the hardware accelerators and the cores happens through L1 shared memory. This communication infrastructure leverages a "zero-copy" scheme, which simplifies both the design process of the platform and the development of applications on top of it. Moreover, the shared-memory template perfectly fits the semantics of several high-level programming models, such as OpenMP. We provide programmers with simple yet powerful abstractions to exploit accelerators from within an OpenMP application, and propose a low-cost implementation of the necessary runtime support. An HLS-based automatic design flow is set up, to quickly explore the design space using a cycle-accurate virtual platform. |
2013 |
Capodieci, Nicola; Alsina, EMANUEL FEDERICO; Cabri, Giacomo Context-awareness in the deregulated electric energy market: an agent-based approach Journal Article CONCURRENCY AND COMPUTATION, 27 , pp. 1513–1524, 2013. Abstract | Links | BibTeX | Tags: adaptation model, intelligent agent, renewable energy, Smart Grids @article{capodieci_context-awareness_2013, title = {Context-awareness in the deregulated electric energy market: an agent-based approach}, author = {Nicola Capodieci and EMANUEL FEDERICO Alsina and Giacomo Cabri}, url = {http://onlinelibrary.wiley.com/doi/10.1002/cpe.3011/abstract}, doi = {10.1002/cpe.3011}, year = {2013}, date = {2013-01-01}, journal = {CONCURRENCY AND COMPUTATION}, volume = {27}, pages = {1513--1524}, abstract = {Multiagent systems are commonly used for simulation of new paradigms of energy distribution. Especially when considering Smart Grids, the autonomicity deployed by goal-driven agents implies the need for being aware of multiple aspects connected to the energy distribution context. With ‘context’, we refer to the outside world variables such as weather, stock market trends, location of the users, government actions, and so on; therefore, an architecture highly context-aware is needed. We propose a model in which every important factor concerning the electric energy distribution is presented by modeling context-aware agents able to identify the impact of these factors. Moreover, some tests have been performed regarding the web service integration in which agents contracting energy will automatically retrieve data to be used in adaptive and collaborative aspects; an explicative example is represented by the retrieval of weather forecasting that provides input on ongoing demand and data for the predicted availability (in case of photovoltaic or wind powered environments).}, keywords = {adaptation model, intelligent agent, renewable energy, Smart Grids}, pubstate = {published}, tppubtype = {article} } Multiagent systems are commonly used for simulation of new paradigms of energy distribution. Especially when considering Smart Grids, the autonomicity deployed by goal-driven agents implies the need for being aware of multiple aspects connected to the energy distribution context. With ‘context’, we refer to the outside world variables such as weather, stock market trends, location of the users, government actions, and so on; therefore, an architecture highly context-aware is needed. We propose a model in which every important factor concerning the electric energy distribution is presented by modeling context-aware agents able to identify the impact of these factors. Moreover, some tests have been performed regarding the web service integration in which agents contracting energy will automatically retrieve data to be used in adaptive and collaborative aspects; an explicative example is represented by the retrieval of weather forecasting that provides input on ongoing demand and data for the predicted availability (in case of photovoltaic or wind powered environments). |
Giacomo, Cabri; Nicola, Capodieci Applying Multi-armed Bandit Strategies to Change of Collaboration Patterns at Runtime Inproceedings Proceedings of the First International Conference on Artificial Intelligence, Modelling and Simulation, pp. 151–156, IEEE Computer Society, Conference Publishing Service, Los Alamitos, California USA, 2013, ISBN: 978-1-4799-3251-1. Abstract | Links | BibTeX | Tags: Autonomic computing, collaboration, Self-expression @inproceedings{giacomo_applying_2013, title = {Applying Multi-armed Bandit Strategies to Change of Collaboration Patterns at Runtime}, author = {Cabri Giacomo and Capodieci Nicola}, doi = {10.1109/AIMS.2013.31}, isbn = {978-1-4799-3251-1}, year = {2013}, date = {2013-01-01}, booktitle = {Proceedings of the First International Conference on Artificial Intelligence, Modelling and Simulation}, pages = {151--156}, publisher = {IEEE Computer Society, Conference Publishing Service}, address = {Los Alamitos, California USA}, abstract = {Autonomic systems have some interesting properties that enable them to self-manage at runtime. Traditionally, they are Self-configuration, Self-healing, Self-optimization, and Self-protection. Another interesting property is Self-expression, i.e. the capability of reconfiguring the collaboration pattern of a system at runtime and in an autonomous way. This re-factoring of behaviours, roles and interactions takes place whenever the environment in which the set of components is inserted experiences some changes, so that in order to adapt to the ever variable external conditions, the system is still able to maximize its utilities. In the past we have motivated the need for such a pattern change, and have proposed some approaches to enact it in situations in which we can associate to each external condition the fittest coordination pattern. In this paper we propose a strategy to decide the change and to choose the next collaboration pattern in situations in which the system components do not have sufficient knowledge and cognition to reason about the varying external conditions. Our approach is based on the algorithms proposed to solve the multi-armed bandit problem, in which a player must choose which slot machine to pull, given a number of them, in order to maximize the reward.}, keywords = {Autonomic computing, collaboration, Self-expression}, pubstate = {published}, tppubtype = {inproceedings} } Autonomic systems have some interesting properties that enable them to self-manage at runtime. Traditionally, they are Self-configuration, Self-healing, Self-optimization, and Self-protection. Another interesting property is Self-expression, i.e. the capability of reconfiguring the collaboration pattern of a system at runtime and in an autonomous way. This re-factoring of behaviours, roles and interactions takes place whenever the environment in which the set of components is inserted experiences some changes, so that in order to adapt to the ever variable external conditions, the system is still able to maximize its utilities. In the past we have motivated the need for such a pattern change, and have proposed some approaches to enact it in situations in which we can associate to each external condition the fittest coordination pattern. In this paper we propose a strategy to decide the change and to choose the next collaboration pattern in situations in which the system components do not have sufficient knowledge and cognition to reason about the varying external conditions. Our approach is based on the algorithms proposed to solve the multi-armed bandit problem, in which a player must choose which slot machine to pull, given a number of them, in order to maximize the reward. |
Capodieci,; Hart,; Cabri, Designing Self-Aware Adaptive Systems: from Autonomic Computing to Cognitive Immune Networks Inproceedings Proceedings of the 7th IEEE International Conference on Self-Adaptation and Self-Organizing Systems Workshops, SASOW 2013, pp. 59–64, IEEE Computer Society, Conference Publishing Service, Los Alamitos, California USA, 2013, ISBN: 978-1-4799-5086-7. Abstract | BibTeX | Tags: Artificial Immune Systems, Autonomic computing, Self-adaptive Systems @inproceedings{capodieci_designing_2013, title = {Designing Self-Aware Adaptive Systems: from Autonomic Computing to Cognitive Immune Networks}, author = {N Capodieci and E Hart and G Cabri}, isbn = {978-1-4799-5086-7}, year = {2013}, date = {2013-01-01}, booktitle = {Proceedings of the 7th IEEE International Conference on Self-Adaptation and Self-Organizing Systems Workshops, SASOW 2013}, pages = {59--64}, publisher = {IEEE Computer Society, Conference Publishing Service}, address = {Los Alamitos, California USA}, abstract = {An autonomic system is composed of ensembles of heterogeneous autonomic components in which large sets of components are dynamically added and removed. Nodes within such an ensemble should cooperate to achieve system or human goals, and systems are expected to self-adapt with little or no human-interaction. Designing such systems poses significant challenges. In this paper we propose that the system engineer might gain significant inspiration by looking to the biological immune system, particularly by adopting a perspective on the immune system proposed by Cohen known as the Cognitive Immune Network. The goal of this paper is to show how the current literature in autonomic computing could be positively enriched by considering alternative design processes based on cognitive immune networks. After sketching out the mapping in commonalities between the Cognitive Immune Network and the autonomic computing reference model, we demonstrate how these considerations regarding the design process can be exploited with an engineered autonomic system by describing experiments with a simple robotic swarm scenario.}, keywords = {Artificial Immune Systems, Autonomic computing, Self-adaptive Systems}, pubstate = {published}, tppubtype = {inproceedings} } An autonomic system is composed of ensembles of heterogeneous autonomic components in which large sets of components are dynamically added and removed. Nodes within such an ensemble should cooperate to achieve system or human goals, and systems are expected to self-adapt with little or no human-interaction. Designing such systems poses significant challenges. In this paper we propose that the system engineer might gain significant inspiration by looking to the biological immune system, particularly by adopting a perspective on the immune system proposed by Cohen known as the Cognitive Immune Network. The goal of this paper is to show how the current literature in autonomic computing could be positively enriched by considering alternative design processes based on cognitive immune networks. After sketching out the mapping in commonalities between the Cognitive Immune Network and the autonomic computing reference model, we demonstrate how these considerations regarding the design process can be exploited with an engineered autonomic system by describing experiments with a simple robotic swarm scenario. |
Capodieci,; Cabri, Collaboration in Swarm Robotics: a Visual Communication Approach Inproceedings Proceedings of the 2013 International Conference on Collaboration Technologies and Systems (CTS), pp. 195–202, IEEE Computer Society, Conference Publishing Service, Los Alamitos, California, 2013, ISBN: 978-1-4673-6403-4. Abstract | BibTeX | Tags: Adaptive collaboration, swarm robots @inproceedings{capodieci_collaboration_2013, title = {Collaboration in Swarm Robotics: a Visual Communication Approach}, author = {N Capodieci and G Cabri}, isbn = {978-1-4673-6403-4}, year = {2013}, date = {2013-01-01}, booktitle = {Proceedings of the 2013 International Conference on Collaboration Technologies and Systems (CTS)}, pages = {195--202}, publisher = {IEEE Computer Society, Conference Publishing Service}, address = {Los Alamitos, California}, abstract = {Swarm robotics involves a number of simple robots that have a common task and carry it out in a collective way. In this context the collaboration among the different components is a crucial aspect, but not always it can be enacted in a direct way, i.e., by means of direct communication between components. This is because of the simplicity of the robots, but this turns out to increase robustness and flexibility of the swarm systems. In this paper we present a case study in which some robots are in charge of sweeping the perimeter of an area, and propose a distributed algorithm for the task division. We have tested the proposed algorithm in different situations; we report the results and we also compare them to the ones achieved with no collaboration.}, keywords = {Adaptive collaboration, swarm robots}, pubstate = {published}, tppubtype = {inproceedings} } Swarm robotics involves a number of simple robots that have a common task and carry it out in a collective way. In this context the collaboration among the different components is a crucial aspect, but not always it can be enacted in a direct way, i.e., by means of direct communication between components. This is because of the simplicity of the robots, but this turns out to increase robustness and flexibility of the swarm systems. In this paper we present a case study in which some robots are in charge of sweeping the perimeter of an area, and propose a distributed algorithm for the task division. We have tested the proposed algorithm in different situations; we report the results and we also compare them to the ones achieved with no collaboration. |
Capodieci, Nicola; Hart, Emma; Cabri, Giacomo An immune network approach for self-adaptive ensembles of autonomic components: a case study in swarm robotics Inproceedings Proceedings of the Twelfth European Conference on the Synthesis and Simulation of Living Systems, pp. 864–871, MIT Press, Cambridge, MA, 2013, ISBN: 978-0-262-31709-2. Abstract | BibTeX | Tags: Autonomic computing, self-adaptive, swarm robotics @inproceedings{capodieci_immune_2013, title = {An immune network approach for self-adaptive ensembles of autonomic components: a case study in swarm robotics}, author = {Nicola Capodieci and Emma Hart and Giacomo Cabri}, isbn = {978-0-262-31709-2}, year = {2013}, date = {2013-01-01}, booktitle = {Proceedings of the Twelfth European Conference on the Synthesis and Simulation of Living Systems}, pages = {864--871}, publisher = {MIT Press}, address = {Cambridge, MA}, abstract = {We describe an immune inspired approach to achieve self-expression within an ensemble, i.e. enabling an ensemble of autonomic components to dynamically change their coordination pattern during the runtime execution of a given task. Building on previous work using idiotypic networks, we consider robotic swarms in which each robot has a lymph node containing a set of antibodies describing conditions under which different coordination patterns can be applied. Antibodies are shared between robots that come into communication range facilitating collaboration. Tests in simulation in robotic arenas of varying complexity show that the swarm is able to learn suitable patterns and effectively achieve a foraging task, particularly in arenas of high complexity.}, keywords = {Autonomic computing, self-adaptive, swarm robotics}, pubstate = {published}, tppubtype = {inproceedings} } We describe an immune inspired approach to achieve self-expression within an ensemble, i.e. enabling an ensemble of autonomic components to dynamically change their coordination pattern during the runtime execution of a given task. Building on previous work using idiotypic networks, we consider robotic swarms in which each robot has a lymph node containing a set of antibodies describing conditions under which different coordination patterns can be applied. Antibodies are shared between robots that come into communication range facilitating collaboration. Tests in simulation in robotic arenas of varying complexity show that the swarm is able to learn suitable patterns and effectively achieve a foraging task, particularly in arenas of high complexity. |
Capodieci,; Cabri, Managing Deregulated Energy Markets: an Adaptive and Autonomous Multi-Agent System Application Inproceedings Proceedings of the 2013 IEEE International Conference on Systems, Man, and Cybernetics, pp. 758–763, IEEE Computer Societyy, Conference Publishing Service, Los Alamitos, California USA, 2013, ISBN: 978-0-7695-5154-8. Abstract | Links | BibTeX | Tags: adaptive strategy, Energy market, Software agents @inproceedings{capodieci_managing_2013, title = {Managing Deregulated Energy Markets: an Adaptive and Autonomous Multi-Agent System Application}, author = {N Capodieci and G Cabri}, doi = {10.1109/SMC.2013.134}, isbn = {978-0-7695-5154-8}, year = {2013}, date = {2013-01-01}, booktitle = {Proceedings of the 2013 IEEE International Conference on Systems, Man, and Cybernetics}, pages = {758--763}, publisher = {IEEE Computer Societyy, Conference Publishing Service}, address = {Los Alamitos, California USA}, abstract = {Given the complexity of modelling actors and interactions of the deregulated electric energy market, the Multi-Agent System approach turns out to be suitable for both simulation and application of critical aspects in the Smart Grid. In particular, for balancing demand and offer and for handling negotiation among peers: now, even a domestic environment that features photovoltaic and/or wind turbines modules can decide to enter the deregulated market as a small-scale seller, thus making the requirement of having such an architecture to be autonomous by deploying Self-* properties such as Self-Organization, Self- Repairing, Self-Adaptation. To be more specific about the presented case study, we propose a model in which small-scale seller agents dynamically and autonomously decide either to address the market as lone operators or by aggregating into Virtual Power Plants, from time to time in order to adapt to different situations. This iterated decisional process depends on highly variable market related factors, thus our goal is to design a net of agents able to autonomously react to this dynamic environment.}, keywords = {adaptive strategy, Energy market, Software agents}, pubstate = {published}, tppubtype = {inproceedings} } Given the complexity of modelling actors and interactions of the deregulated electric energy market, the Multi-Agent System approach turns out to be suitable for both simulation and application of critical aspects in the Smart Grid. In particular, for balancing demand and offer and for handling negotiation among peers: now, even a domestic environment that features photovoltaic and/or wind turbines modules can decide to enter the deregulated market as a small-scale seller, thus making the requirement of having such an architecture to be autonomous by deploying Self-* properties such as Self-Organization, Self- Repairing, Self-Adaptation. To be more specific about the presented case study, we propose a model in which small-scale seller agents dynamically and autonomously decide either to address the market as lone operators or by aggregating into Virtual Power Plants, from time to time in order to adapt to different situations. This iterated decisional process depends on highly variable market related factors, thus our goal is to design a net of agents able to autonomously react to this dynamic environment. |
Cabri,; Capodieci, Runtime Change of Collaboration Patterns in Autonomic Systems: Motivations and Perspectives Inproceedings Proceedings of the 2013 IEEE 27th International Conference on Advanced Information Networking and Applications Workshops, pp. 1038–1043, IEEE Conference Publishing Services, IEEE Computer Society, Los Alamitos, 2013, ISBN: 978-1-4673-6239-9. Abstract | Links | BibTeX | Tags: Autonomic computing, collaborative pattern @inproceedings{cabri_runtime_2013, title = {Runtime Change of Collaboration Patterns in Autonomic Systems: Motivations and Perspectives}, author = {G Cabri and N Capodieci}, doi = {10.1109/WAINA.2013.82}, isbn = {978-1-4673-6239-9}, year = {2013}, date = {2013-01-01}, booktitle = {Proceedings of the 2013 IEEE 27th International Conference on Advanced Information Networking and Applications Workshops}, pages = {1038--1043}, publisher = {IEEE Conference Publishing Services, IEEE Computer Society}, address = {Los Alamitos}, abstract = {Today’s distributed systems are more and more complex, so they are required to be autonomic, i.e., to exhibit some self-* properties in order to manage themselves. Autonomic systems are usually composed of different components, which collaborate to achieve a global goal or to provide a high-level service. The collaboration pattern is usually defined statically, but the aim of this paper is to show that there are motivations to enable composed systems to change their collaboration pattern at runtime in an autonomous way, starting from some case studies. This capability of autonomic systems is called self-expression, meaning that a system can express itself despite unexpected situations. Moreover, we propose three perspective solutions that aim at enabling the change at runtime: role-based, descriptionbased, and Artificial Immune Systems (AIS)-inspired.}, keywords = {Autonomic computing, collaborative pattern}, pubstate = {published}, tppubtype = {inproceedings} } Today’s distributed systems are more and more complex, so they are required to be autonomic, i.e., to exhibit some self-* properties in order to manage themselves. Autonomic systems are usually composed of different components, which collaborate to achieve a global goal or to provide a high-level service. The collaboration pattern is usually defined statically, but the aim of this paper is to show that there are motivations to enable composed systems to change their collaboration pattern at runtime in an autonomous way, starting from some case studies. This capability of autonomic systems is called self-expression, meaning that a system can express itself despite unexpected situations. Moreover, we propose three perspective solutions that aim at enabling the change at runtime: role-based, descriptionbased, and Artificial Immune Systems (AIS)-inspired. |
Zanella,; Zanghirati,; Cavicchioli,; Zanni,; Boccacci,; Bertero,; Vicidomini, Towards real-time image deconvolution: application to confocal and STED microscopy Journal Article SCIENTIFIC REPORTS, 3 , pp. 2523–2523, 2013. Abstract | Links | BibTeX | Tags: confocal laser scanning microscopy (CLSM), fluorescence microscopy, graphics processing unit (GPU)., image deconvolution, Stimulated emission depletion (STED) microscopy @article{zanella_towards_2013, title = {Towards real-time image deconvolution: application to confocal and STED microscopy}, author = {R Zanella and G Zanghirati and R Cavicchioli and L Zanni and P Boccacci and M Bertero and G Vicidomini}, doi = {10.1038/srep02523}, year = {2013}, date = {2013-01-01}, journal = {SCIENTIFIC REPORTS}, volume = {3}, pages = {2523--2523}, abstract = {Although deconvolution can improve the quality of any type of microscope, the high computational time required has so far limited its massive spreading. Here we demonstrate the ability of the scaled-gradient-projection (SGP) method to provide accelerated versions of the most used algorithms in microscopy. To achieve further increases in efficiency, we also consider implementations on graphic processing units (GPUs). We test the proposed algorithms both on synthetic and real data of confocal and STED microscopy. Combining the SGP method with the GPU implementation we achieve a speed-up factor from about a factor 25 to 690 (with respect the conventional algorithm). The excellent results obtained on STED microscopy images demonstrate the synergy between super-resolution techniques and image-deconvolution. Further, the real-time processing allows conserving one of the most important property of STED microscopy, i.e the ability to provide fast sub-diffraction resolution recordings.}, keywords = {confocal laser scanning microscopy (CLSM), fluorescence microscopy, graphics processing unit (GPU)., image deconvolution, Stimulated emission depletion (STED) microscopy}, pubstate = {published}, tppubtype = {article} } Although deconvolution can improve the quality of any type of microscope, the high computational time required has so far limited its massive spreading. Here we demonstrate the ability of the scaled-gradient-projection (SGP) method to provide accelerated versions of the most used algorithms in microscopy. To achieve further increases in efficiency, we also consider implementations on graphic processing units (GPUs). We test the proposed algorithms both on synthetic and real data of confocal and STED microscopy. Combining the SGP method with the GPU implementation we achieve a speed-up factor from about a factor 25 to 690 (with respect the conventional algorithm). The excellent results obtained on STED microscopy images demonstrate the synergy between super-resolution techniques and image-deconvolution. Further, the real-time processing allows conserving one of the most important property of STED microscopy, i.e the ability to provide fast sub-diffraction resolution recordings. |
Cavicchioli,; Chaux,; Blanc-Feraud,; Zanni, ML estimation of wavelet regularization hyperparameters in inverse problems Inproceedings Proc. of IEEE Int. Conf. on Acoustics, Speech and Signal Processing (ICASSP 2013), pp. 1553–1557, IEEE, 2013, ISBN: 978-1-4799-0356-6. Abstract | BibTeX | Tags: gradient projection methods, Inverse problems, wavelet transform @inproceedings{cavicchioli_ml_2013, title = {ML estimation of wavelet regularization hyperparameters in inverse problems}, author = {R Cavicchioli and C Chaux and L Blanc-Feraud and L Zanni}, isbn = {978-1-4799-0356-6}, year = {2013}, date = {2013-01-01}, booktitle = {Proc. of IEEE Int. Conf. on Acoustics, Speech and Signal Processing (ICASSP 2013)}, pages = {1553--1557}, publisher = {IEEE}, abstract = {In this paper we are interested in regularizing hyperparameter estimation by maximum likelihood in inverse problems with wavelet regularization. One parameter per subband will be estimated by gradient ascent algorithm. We have to face with twomain difficulties: i) sampling the a posteriori image distribution to compute the gradient; ii) choosing a suited step-size to ensure good convergence properties. We first show that introducing an auxiliary variable makes the sampling feasible using classical Metropolis-Hastings algorithm and Gibbs sampler. Secondly, we propose an adaptive step-size selection and a line-search strategy to improve the gradient-based method. Good performances of the proposed approach are demonstrated on both synthetic and real data.}, keywords = {gradient projection methods, Inverse problems, wavelet transform}, pubstate = {published}, tppubtype = {inproceedings} } In this paper we are interested in regularizing hyperparameter estimation by maximum likelihood in inverse problems with wavelet regularization. One parameter per subband will be estimated by gradient ascent algorithm. We have to face with twomain difficulties: i) sampling the a posteriori image distribution to compute the gradient; ii) choosing a suited step-size to ensure good convergence properties. We first show that introducing an auxiliary variable makes the sampling feasible using classical Metropolis-Hastings algorithm and Gibbs sampler. Secondly, we propose an adaptive step-size selection and a line-search strategy to improve the gradient-based method. Good performances of the proposed approach are demonstrated on both synthetic and real data. |
Rahimi, Abbas; Marongiu, Andrea; Burgio, Paolo; Gupta, Rajesh; Benini, Luca Design, Automation & Test in Europe Conference & Exhibition (DATE), 2013, pp. 541–546, 2013 IEEE Conference Proceedings, 2013, ISBN: 978-1-4673-5071-6. Abstract | Links | BibTeX | Tags: Dynamic variations, Hardware features, Hardware/software co-design, Hardware/Software interfaces, Instruction per cycles, processor clusters, Programming models @inproceedings{rahimi_variation-tolerant_2013, title = {Variation-tolerant OpenMP Tasking on Tightly-coupled Processor ClustersDesign, Automation & Test in Europe Conference & Exhibition (DATE), 2013}, author = {Abbas Rahimi and Andrea Marongiu and Paolo Burgio and Rajesh K Gupta and Luca Benini}, doi = {10.7873/DATE.2013.121}, isbn = {978-1-4673-5071-6}, year = {2013}, date = {2013-01-01}, booktitle = {Design, Automation & Test in Europe Conference & Exhibition (DATE), 2013}, pages = {541--546}, publisher = {2013 IEEE Conference Proceedings}, abstract = {We present a variation-tolerant tasking technique for tightly-coupled shared memory processor clusters that relies upon modeling advance across the hardware/software interface. This is implemented as an extension to the OpenMP 3.0 tasking programming model. Using the notion of Task-Level Vulnerability (TLV) proposed here, we capture dynamic variations caused by circuit-level variability as a high-level software knowledge. This is accomplished through a variation-aware hardware/software codesign where: (i) Hardware features variability monitors in conjunction with online per-core characterization of TLV metadata; (ii) Software supports a Task-level Errant Instruction Management (TEIM) technique to utilize TLV metadata in the runtime OpenMP task scheduler. This method greatly reduces the number of recovery cycles compared to the baseline scheduler of OpenMP [22], consequently instruction per cycle (IPC) of a 16-core processor cluster is increased up to 1.51× (1.17× on average). We evaluate the effectiveness of our approach with various number of cores (4,8,12,16), and across a wide temperature range(ΔT=90°C).}, keywords = {Dynamic variations, Hardware features, Hardware/software co-design, Hardware/Software interfaces, Instruction per cycles, processor clusters, Programming models}, pubstate = {published}, tppubtype = {inproceedings} } We present a variation-tolerant tasking technique for tightly-coupled shared memory processor clusters that relies upon modeling advance across the hardware/software interface. This is implemented as an extension to the OpenMP 3.0 tasking programming model. Using the notion of Task-Level Vulnerability (TLV) proposed here, we capture dynamic variations caused by circuit-level variability as a high-level software knowledge. This is accomplished through a variation-aware hardware/software codesign where: (i) Hardware features variability monitors in conjunction with online per-core characterization of TLV metadata; (ii) Software supports a Task-level Errant Instruction Management (TEIM) technique to utilize TLV metadata in the runtime OpenMP task scheduler. This method greatly reduces the number of recovery cycles compared to the baseline scheduler of OpenMP [22], consequently instruction per cycle (IPC) of a 16-core processor cluster is increased up to 1.51× (1.17× on average). We evaluate the effectiveness of our approach with various number of cores (4,8,12,16), and across a wide temperature range(ΔT=90°C). |
Burgio,; Marongiu,; Danilo,; Coussy,; Benini, Architecture and programming model support for efficient heterogeneous computing on tigthly-coupled shared-memory clusters Inproceedings Design and Architectures for Signal and Image Processing (DASIP), 2013 Conference on, pp. 22–29, IEEE 2013, 2013, ISBN: 979-10-92279-01-6. Abstract | BibTeX | Tags: Acceleration capabilities, Application development, Better performance, Computational kernels, Hardware acceleration, Heterogeneous clusters @inproceedings{burgio_architecture_2013, title = {Architecture and programming model support for efficient heterogeneous computing on tigthly-coupled shared-memory clusters}, author = {P Burgio and A Marongiu and R Danilo and P Coussy and L Benini}, isbn = {979-10-92279-01-6}, year = {2013}, date = {2013-01-01}, booktitle = {Design and Architectures for Signal and Image Processing (DASIP), 2013 Conference on}, pages = {22--29}, publisher = {IEEE 2013}, abstract = {Modern computer vision and image processing embedded systems exploit hardware acceleration inside scalable parallel architectures, such as tightly-coupled clusters, to achieve stringent performance and energy efficiency targets. Architectural heterogeneity typically makes software development cumbersome, thus shared memory processor-to-accelerator communication is typically preferred to simplify code offioading to HW IPs for critical computational kernels. However, tightly coupling a large number of accelerators and processors in a shared memory cluster is a challenging task, since the complexity of the resulting system quickly becomes too large. We tackle these issues by proposing a template of heterogeneous shared memory cluster which scales to a large number of accelerators, achieving up to 40% better performance/area/watt than simply designing larger main interconnects to accommodate several HW IPs. In addition, following a trend towards standardization of acceleration capabilities of future embedded systems, we develop a programming model which simplifies application development for heterogeneous clusters.}, keywords = {Acceleration capabilities, Application development, Better performance, Computational kernels, Hardware acceleration, Heterogeneous clusters}, pubstate = {published}, tppubtype = {inproceedings} } Modern computer vision and image processing embedded systems exploit hardware acceleration inside scalable parallel architectures, such as tightly-coupled clusters, to achieve stringent performance and energy efficiency targets. Architectural heterogeneity typically makes software development cumbersome, thus shared memory processor-to-accelerator communication is typically preferred to simplify code offioading to HW IPs for critical computational kernels. However, tightly coupling a large number of accelerators and processors in a shared memory cluster is a challenging task, since the complexity of the resulting system quickly becomes too large. We tackle these issues by proposing a template of heterogeneous shared memory cluster which scales to a large number of accelerators, achieving up to 40% better performance/area/watt than simply designing larger main interconnects to accommodate several HW IPs. In addition, following a trend towards standardization of acceleration capabilities of future embedded systems, we develop a programming model which simplifies application development for heterogeneous clusters. |
Burgio, Paolo; Tagliavini, Giuseppe; Marongiu, Andrea; Benini, Luca Design, Automation & Test in Europe Conference & Exhibition (DATE), 2013, pp. 1504–1509, 2013 IEEE Conference Proceedings, 2013, ISBN: 978-1-4673-5071-6. Abstract | Links | BibTeX | Tags: Cluster-based architectures, OpenMP tasking, shared memory cluster @inproceedings{burgio_enabling_2013, title = {Enabling Fine-Grained OpenMP Tasking on Tightly-Coupled Shared Memory ClustersDesign, Automation & Test in Europe Conference & Exhibition (DATE), 2013}, author = {Paolo Burgio and Giuseppe Tagliavini and Andrea Marongiu and Luca Benini}, doi = {10.7873/DATE.2013.306}, isbn = {978-1-4673-5071-6}, year = {2013}, date = {2013-01-01}, booktitle = {Design, Automation & Test in Europe Conference & Exhibition (DATE), 2013}, pages = {1504--1509}, publisher = {2013 IEEE Conference Proceedings}, abstract = {Cluster-based architectures are increasingly being adopted to design embedded many-cores. These platforms can deliver very high peak performance within a contained power envelope, provided that programmers can make effective use the available parallel cores. This is becoming an extremely difficult task, as embedded applications are growing in complexity and exhibit irregular and dynamic parallelism. The OpenMP tasking extensions represent a powerful abstraction to capture this form of parallelism. However, efficiently supporting it on cluster-based embedded SoCs is not easy, because the fine-grained parallel workload present in embedded applications can not tolerate high memory and run-time overheads. In this paper we present our design of the runtime support layer to OpenMP tasking for an embedded shared memory cluster, identifying key aspects to achieving performance and discussing important architectural support to removing major bottlenecks.}, keywords = {Cluster-based architectures, OpenMP tasking, shared memory cluster}, pubstate = {published}, tppubtype = {inproceedings} } Cluster-based architectures are increasingly being adopted to design embedded many-cores. These platforms can deliver very high peak performance within a contained power envelope, provided that programmers can make effective use the available parallel cores. This is becoming an extremely difficult task, as embedded applications are growing in complexity and exhibit irregular and dynamic parallelism. The OpenMP tasking extensions represent a powerful abstraction to capture this form of parallelism. However, efficiently supporting it on cluster-based embedded SoCs is not easy, because the fine-grained parallel workload present in embedded applications can not tolerate high memory and run-time overheads. In this paper we present our design of the runtime support layer to OpenMP tasking for an embedded shared memory cluster, identifying key aspects to achieving performance and discussing important architectural support to removing major bottlenecks. |
2012 |
Capodieci,; Cabri,; Pagani,; Aiello, Adaptive Game-based Agent Negotiation in Deregulated Energy Markets Inproceedings Proceedings of the 2012 International Conference on Collaboration Technologies and Systems, pp. 300–307, IEEE Computer Society Press, Piscataway, NJ, 2012, ISBN: 978-1-4673-1381-0. Abstract | BibTeX | Tags: Energy market, Game theory, Software agents @inproceedings{capodieci_adaptive_2012, title = {Adaptive Game-based Agent Negotiation in Deregulated Energy Markets}, author = {N Capodieci and G Cabri and G A Pagani and M Aiello}, isbn = {978-1-4673-1381-0}, year = {2012}, date = {2012-01-01}, booktitle = {Proceedings of the 2012 International Conference on Collaboration Technologies and Systems}, pages = {300--307}, publisher = {IEEE Computer Society Press}, address = {Piscataway, NJ}, abstract = {In the emerging deregulated energy paradigm enabled by the Smart Grid, energy provisioning will change drastically. Energy contracts will be negotiated between a potential multitude of parties at high frequency (e.g., several times per day) based on local needs and micro-generation production facilities. In this context, this paper presents an agent-based approach to manage negotiation among the different parties.The goal of the presented work is to propose adaptive negotiation strategies for trading energy in a deregulated market. In particular, we provide strategies derived from game theory, in order to optimize energy production and supply costs by means of negotiation and adaptation. The novelty lies in the adaptation of the class of minority and stochastic games to the energy trading problem in order to model the strategy of the various parties involved. The paper presents also simulation results of a scenario with a large number of energy buyers, a small set of prosumers (energy consumers and producers using renewable micro-generation facilities) and a few large-scale traditional electricity suppliers.}, keywords = {Energy market, Game theory, Software agents}, pubstate = {published}, tppubtype = {inproceedings} } In the emerging deregulated energy paradigm enabled by the Smart Grid, energy provisioning will change drastically. Energy contracts will be negotiated between a potential multitude of parties at high frequency (e.g., several times per day) based on local needs and micro-generation production facilities. In this context, this paper presents an agent-based approach to manage negotiation among the different parties.The goal of the presented work is to propose adaptive negotiation strategies for trading energy in a deregulated market. In particular, we provide strategies derived from game theory, in order to optimize energy production and supply costs by means of negotiation and adaptation. The novelty lies in the adaptation of the class of minority and stochastic games to the energy trading problem in order to model the strategy of the various parties involved. The paper presents also simulation results of a scenario with a large number of energy buyers, a small set of prosumers (energy consumers and producers using renewable micro-generation facilities) and a few large-scale traditional electricity suppliers. |
Capodieci,; Alsina,; Cabri, A Context-aware Agent-based Approach for Deregulated Energy Market Inproceedings Proceedings of the 21st IEEE International WETICE conference, pp. 16–21, IEEE Computer Society, Conference Publishing Service, Los Alamitos, CA, 2012, ISBN: 978-1-4673-1888-4. Abstract | BibTeX | Tags: Agents, Energy market, Game theory @inproceedings{capodieci_context-aware_2012, title = {A Context-aware Agent-based Approach for Deregulated Energy Market}, author = {N Capodieci and E Alsina and G Cabri}, isbn = {978-1-4673-1888-4}, year = {2012}, date = {2012-01-01}, booktitle = {Proceedings of the 21st IEEE International WETICE conference}, pages = {16--21}, publisher = {IEEE Computer Society, Conference Publishing Service}, address = {Los Alamitos, CA}, abstract = {A deregulated energy market is a typical scenario in which software agents are used for simulation and/or application purposes. Agents act on behalf of end users, thus implying the necessity of being aware of multiple aspects connected to the distribution of electricity. These aspects refer to outside world variables like weather, stock market trends, location of the users etc. therefore an architecture highly context aware is needed. We propose a web service integration in which agents contracting energy will automatically retrieve data to be used in adaptive and collaborative aspects, an explicative example, misrepresented by the retrieval of weather forecasting, that provides input on ongoing demand and data for the predicted availability(in case of photovoltaic or wind powered environments). The challenge lies in how to correctly use data coming from different sources, since these information are crucial for user profiling and balancing in the short-term contracts in the Smart Grid.}, keywords = {Agents, Energy market, Game theory}, pubstate = {published}, tppubtype = {inproceedings} } A deregulated energy market is a typical scenario in which software agents are used for simulation and/or application purposes. Agents act on behalf of end users, thus implying the necessity of being aware of multiple aspects connected to the distribution of electricity. These aspects refer to outside world variables like weather, stock market trends, location of the users etc. therefore an architecture highly context aware is needed. We propose a web service integration in which agents contracting energy will automatically retrieve data to be used in adaptive and collaborative aspects, an explicative example, misrepresented by the retrieval of weather forecasting, that provides input on ongoing demand and data for the predicted availability(in case of photovoltaic or wind powered environments). The challenge lies in how to correctly use data coming from different sources, since these information are crucial for user profiling and balancing in the short-term contracts in the Smart Grid. |
Capodieci,; Cabri,; Pagani,; Aiello, An Agent-based Application to Enable Deregulated Energy Markets Inproceedings Proceedings of the 36th Annual IEEE Computer Software and Applications Conference (COMPSAC 2012), pp. 638–647, IEEE Computer Society, Conference Publishing Service, Los Alamitos, California, 2012, ISBN: 978-1-4673-1990-4. Abstract | BibTeX | Tags: Energy, Game theory, Software agents @inproceedings{capodieci_agent-based_2012, title = {An Agent-based Application to Enable Deregulated Energy Markets}, author = {N Capodieci and G Cabri and G A Pagani and M Aiello}, isbn = {978-1-4673-1990-4}, year = {2012}, date = {2012-01-01}, booktitle = {Proceedings of the 36th Annual IEEE Computer Software and Applications Conference (COMPSAC 2012)}, pages = {638--647}, publisher = {IEEE Computer Society, Conference Publishing Service}, address = {Los Alamitos, California}, abstract = {Private houses are more and more enabled with devices that can produce renewable energy, and the not so remote chance of selling the surplus energy makes them new players in the energy market.This market is likely to become deregulated since each energy home-producer can negotiate the energy price with consumers, typically by means of an auction; on the other hand, consumers can always rely on energy companies, even if their energy is more expensive.This scenario could lead to advantages for users, but it is certainly complex and dynamic, and needs an appropriate management.To this purpose, in this paper we propose an agent-based application to deal with the negotiation among different parties producing and consuming energy.Software agents, thanks to their autonomy in taking decisions, well suit the requirements of the proposed scenario.For our application, we adopt a strategy derived from game theory, in order to optimize energy production and supply costs by means of negotiation and learning.The effectiveness of our approach is proved by simulation results of a situation involving energy buyers, energy producers using renewable micro-generation facilities and large-scale traditional electricity companies.}, keywords = {Energy, Game theory, Software agents}, pubstate = {published}, tppubtype = {inproceedings} } Private houses are more and more enabled with devices that can produce renewable energy, and the not so remote chance of selling the surplus energy makes them new players in the energy market.This market is likely to become deregulated since each energy home-producer can negotiate the energy price with consumers, typically by means of an auction; on the other hand, consumers can always rely on energy companies, even if their energy is more expensive.This scenario could lead to advantages for users, but it is certainly complex and dynamic, and needs an appropriate management.To this purpose, in this paper we propose an agent-based application to deal with the negotiation among different parties producing and consuming energy.Software agents, thanks to their autonomy in taking decisions, well suit the requirements of the proposed scenario.For our application, we adopt a strategy derived from game theory, in order to optimize energy production and supply costs by means of negotiation and learning.The effectiveness of our approach is proved by simulation results of a situation involving energy buyers, energy producers using renewable micro-generation facilities and large-scale traditional electricity companies. |
Capodieci,; Cabri, Coordination And Task Division In Robot Ensembles: Perimeter Sweep Case Study Inproceedings Proceedings of the 21st IEEE International WETICE conference, pp. 101–103, IEEE Computer Society, Conference Publishing Service, Los Alamitos, CA, 2012, ISBN: 978-1-4673-1888-4. Abstract | BibTeX | Tags: adaptive, autonomic, coordination, ensamble, robot @inproceedings{capodieci_coordination_2012, title = {Coordination And Task Division In Robot Ensembles: Perimeter Sweep Case Study}, author = {N Capodieci and G Cabri}, isbn = {978-1-4673-1888-4}, year = {2012}, date = {2012-01-01}, booktitle = {Proceedings of the 21st IEEE International WETICE conference}, pages = {101--103}, publisher = {IEEE Computer Society, Conference Publishing Service}, address = {Los Alamitos, CA}, abstract = {Coordination and communication in scenarios where a multitude of robots are presents represent very critical aspects when designing robot ensembles. This paper will show how is it possible to coordinate and sub-divide tasks in a homogeneous ensemble of robots, exploiting a wall-following case study in which every bot has to be aware of the presence of other peers, by adapting its choices accordingly and cooperate to improve the overall performances. The global task has to be accomplished with extremely limited communication capabilities and by distributed coordination. Software simulations that model existing robots were performed for helping us to show the efficiency of the proposed approach.}, keywords = {adaptive, autonomic, coordination, ensamble, robot}, pubstate = {published}, tppubtype = {inproceedings} } Coordination and communication in scenarios where a multitude of robots are presents represent very critical aspects when designing robot ensembles. This paper will show how is it possible to coordinate and sub-divide tasks in a homogeneous ensemble of robots, exploiting a wall-following case study in which every bot has to be aware of the presence of other peers, by adapting its choices accordingly and cooperate to improve the overall performances. The global task has to be accomplished with extremely limited communication capabilities and by distributed coordination. Software simulations that model existing robots were performed for helping us to show the efficiency of the proposed approach. |
Nicola, Capodieci; Giacomo, Cabri Conceptual Map and Classification in Ensembles of Autonomic Components: from Awareness to Organization Inproceedings Proceedings of the 2012 IEEE Sixth International Conference on Self-Adaptive and Self-Organizing Systems Workshops (SASOW), pp. 127–132, IEEE Computer Society, Conference Publishing Service, Los Alamitos, CA, 2012, ISBN: 978-1-4673-5153-9. Abstract | Links | BibTeX | Tags: Autonomic computing, awareness @inproceedings{nicola_conceptual_2012, title = {Conceptual Map and Classification in Ensembles of Autonomic Components: from Awareness to Organization}, author = {Capodieci Nicola and Cabri Giacomo}, doi = {10.1109/SASOW.2012.30}, isbn = {978-1-4673-5153-9}, year = {2012}, date = {2012-01-01}, booktitle = {Proceedings of the 2012 IEEE Sixth International Conference on Self-Adaptive and Self-Organizing Systems Workshops (SASOW)}, pages = {127--132}, publisher = {IEEE Computer Society, Conference Publishing Service}, address = {Los Alamitos, CA}, abstract = {Designing cooperative autonomic components andstudying their possible interactions when operating in ensemblein open and non-deterministic environments, poses severalchallenges for the developer. While older studies focus on theseveral possibilities for optimized collaboration protocols andcommunication possibilities in distributed architectures, themost recent trend is more focussing on the self-expression andself-organizing features that aware components may deployduring run-time, thus exploring new frontiers in designingadaptive collaboration patterns. In this paper we propose aconceptual map for autonomic components (taking into accountthe distributed robotics scenario) able to classify old andnew approaches in collaboration, highlighting similarities andcommonalities between patterns and by focussing on the laststudies about awareness of autonomous components. Being apreliminary study, the further goal is represented by findinga design approach for dynamically changing collaborationpatterns during a run-time execution of tasks.}, keywords = {Autonomic computing, awareness}, pubstate = {published}, tppubtype = {inproceedings} } Designing cooperative autonomic components andstudying their possible interactions when operating in ensemblein open and non-deterministic environments, poses severalchallenges for the developer. While older studies focus on theseveral possibilities for optimized collaboration protocols andcommunication possibilities in distributed architectures, themost recent trend is more focussing on the self-expression andself-organizing features that aware components may deployduring run-time, thus exploring new frontiers in designingadaptive collaboration patterns. In this paper we propose aconceptual map for autonomic components (taking into accountthe distributed robotics scenario) able to classify old andnew approaches in collaboration, highlighting similarities andcommonalities between patterns and by focussing on the laststudies about awareness of autonomous components. Being apreliminary study, the further goal is represented by findinga design approach for dynamically changing collaborationpatterns during a run-time execution of tasks. |
Prato,; Cavicchioli,; Zanni,; Boccacci,; Bertero, Efficient deconvolution methods for astronomical imaging: algorithms and IDL-GPU codes Journal Article ASTRONOMY & ASTROPHYSICS, 539 , pp. A133–A133, 2012. Abstract | Links | BibTeX | Tags: acceleration methods, GPU implementation, image deconvolution, Richardson-Lucy algorithm @article{prato_efficient_2012, title = {Efficient deconvolution methods for astronomical imaging: algorithms and IDL-GPU codes}, author = {M Prato and R Cavicchioli and L Zanni and P Boccacci and M Bertero}, doi = {10.1051/0004-6361/201118681}, year = {2012}, date = {2012-01-01}, journal = {ASTRONOMY & ASTROPHYSICS}, volume = {539}, pages = {A133--A133}, abstract = {Context. The Richardson-Lucy (RL) method is the most popular deconvolution method in Astronomy because it preserves the number of counts and the nonnegativity of the original object. Regularization is, in general, obtained by an early stopping of RL iterations; in the case of point-wise objects such as binaries or open star clusters, iterations can be pushed to convergence. However, it is well known that RL is not an efficient method: in most cases and, in particular, for low noise levels, acceptable solutions are obtained at the cost of hundreds or thousands of iterations. Therefore, several approaches for accelerating RL have been proposed. They are mainly based on the remark that RL is a scaled gradient method for the minimization of the Kullback-Leibler (KL) divergence, or Csiszar I-divergence, which represents the data-fidelity function in the case of Poisson noise. In this framework, a line search along the descent direction is considered for reducing the number of iterations.Aims. In a recent paper, a general optimization method, denoted as scaled gradient projection (SGP) method , has been proposed for the constrained minimization of continuously differentiable convex functions. It is applicable to the nonnegative minimization of the KL divergence. If the scaling suggested by RL is used in this method, then it provides a considerable speedup of RL. Therefore the aim of this paper is to apply SGP to a number of imaging problems in Astronomy such as single image deconvolution, multiple image deconvolution and boundary effect correction.Methods. Deconvolution methods are proposed by applying SGP to the minimization of the KL divergence for the imaging problems mentioned above and the corresponding algorithms are derived and implemented in IDL. For all the algorithms several stopping rules are introduced, including one based on a recently proposed discrepancy principle for Poisson data. For a further increase of efficiency, implementation on GPU (Graphic Processing Unit) is also considered.Results. The proposed algorithms are tested on simulated images. The speedup of SGP methods with respect to the corresponding RL methods strongly depends on the problem and on the specific object to be reconstructed, and in our simulationsit ranges from about 4 to more than 30. Moreover, significant speedups up to two orders of magnitude have been observed between the serial and parallel implementations of the algorithms. The codes are available upon request.}, keywords = {acceleration methods, GPU implementation, image deconvolution, Richardson-Lucy algorithm}, pubstate = {published}, tppubtype = {article} } Context. The Richardson-Lucy (RL) method is the most popular deconvolution method in Astronomy because it preserves the number of counts and the nonnegativity of the original object. Regularization is, in general, obtained by an early stopping of RL iterations; in the case of point-wise objects such as binaries or open star clusters, iterations can be pushed to convergence. However, it is well known that RL is not an efficient method: in most cases and, in particular, for low noise levels, acceptable solutions are obtained at the cost of hundreds or thousands of iterations. Therefore, several approaches for accelerating RL have been proposed. They are mainly based on the remark that RL is a scaled gradient method for the minimization of the Kullback-Leibler (KL) divergence, or Csiszar I-divergence, which represents the data-fidelity function in the case of Poisson noise. In this framework, a line search along the descent direction is considered for reducing the number of iterations.Aims. In a recent paper, a general optimization method, denoted as scaled gradient projection (SGP) method , has been proposed for the constrained minimization of continuously differentiable convex functions. It is applicable to the nonnegative minimization of the KL divergence. If the scaling suggested by RL is used in this method, then it provides a considerable speedup of RL. Therefore the aim of this paper is to apply SGP to a number of imaging problems in Astronomy such as single image deconvolution, multiple image deconvolution and boundary effect correction.Methods. Deconvolution methods are proposed by applying SGP to the minimization of the KL divergence for the imaging problems mentioned above and the corresponding algorithms are derived and implemented in IDL. For all the algorithms several stopping rules are introduced, including one based on a recently proposed discrepancy principle for Poisson data. For a further increase of efficiency, implementation on GPU (Graphic Processing Unit) is also considered.Results. The proposed algorithms are tested on simulated images. The speedup of SGP methods with respect to the corresponding RL methods strongly depends on the problem and on the specific object to be reconstructed, and in our simulationsit ranges from about 4 to more than 30. Moreover, significant speedups up to two orders of magnitude have been observed between the serial and parallel implementations of the algorithms. The codes are available upon request. |
Cavicchioli,; Prato,; Zanni,; Boccacci,; Bertero, Efficient multi-image deconvolution in astronomy Inproceedings Optimization Techniques for Inverse Problems II, 2012. Abstract | BibTeX | Tags: Astronomical imaging, constrained optimization, GPU implementation, images deconvolution @inproceedings{cavicchioli_efficient_2012, title = {Efficient multi-image deconvolution in astronomy}, author = {R Cavicchioli and M Prato and L Zanni and P Boccacci and M Bertero}, year = {2012}, date = {2012-01-01}, booktitle = {Optimization Techniques for Inverse Problems II}, abstract = {The deconvolution of astronomical images by the Richardson-Lucy method (RLM) is extended here to the problem of multiple image deconvolution and the reduction of boundary effects. We show the multiple image RLM in its accelerated gradient-version SGP (Scaled Gradient Projection). Numerical simulations indicate that the approach can provide excellent results with a considerable reduction of the boundary effects. Also exploiting GPUlib applied to the IDL code, we obtained a remarkable acceleration of up to two orders of magnitude.}, keywords = {Astronomical imaging, constrained optimization, GPU implementation, images deconvolution}, pubstate = {published}, tppubtype = {inproceedings} } The deconvolution of astronomical images by the Richardson-Lucy method (RLM) is extended here to the problem of multiple image deconvolution and the reduction of boundary effects. We show the multiple image RLM in its accelerated gradient-version SGP (Scaled Gradient Projection). Numerical simulations indicate that the approach can provide excellent results with a considerable reduction of the boundary effects. Also exploiting GPUlib applied to the IDL code, we obtained a remarkable acceleration of up to two orders of magnitude. |
Cavicchioli,; Prearo,; Zanella,; Zanghirati,; Zanni, Optimization methods for digital image restoration on MPP multicore architectures Incollection Recent Advances in Nonlinear Optimization and Equilibrium Problems: a Tribute to Marco Đ’Apuzzo, 27 , pp. 93–116, Aracne editrice S.r.l., Roma, 2012, ISBN: 978-88-548-5687-5. Abstract | BibTeX | Tags: Image restoration, Optimization methods, parallel computing @incollection{cavicchioli_optimization_2012, title = {Optimization methods for digital image restoration on MPP multicore architectures}, author = {R Cavicchioli and A Prearo and R Zanella and G Zanghirati and L Zanni}, isbn = {978-88-548-5687-5}, year = {2012}, date = {2012-01-01}, booktitle = {Recent Advances in Nonlinear Optimization and Equilibrium Problems: a Tribute to Marco Đ’Apuzzo}, volume = {27}, pages = {93--116}, publisher = {Aracne editrice S.r.l.}, address = {Roma}, abstract = {We consider the numerical solution on modern multicore architectures of large-scale optimization problems arising in image restoration. An efficient solution of these optimization problems is important in several areas, such as medical imaging, microscopy and astronomy, where large-scale imaging is a basic task. To face these challenging problems, a lot of effort has been put in designing effective algorithms, that have largely improved the classical optimization strategies usually applied in image processing. Nevertheless, in many large-scale applications also these improved algorithms do not provide the expected reconstruction in a reasonable time. In these cases, the modern multiprocessor architectures represent an important resource for reducing the reconstruction time. Actually, one can consider different possibilities for a parallel computational scenario. One is the use of Graphics Processing Units (GPUs): they were originally designed to perform many simple operations on matrices and vectors with high efficiency and low accuracy (single precision arithmetic), but they have recently seen a huge development of both computational power and accuracy (double precision arithmetic), while still retaining compactness and low price. Another possibility is the use of last-generation multi-core CPUs, where general-purpose, very powerful computational cores are integrated inside the same CPU and a bunch of CPUs can be hosted by the same motherboard, sharing a central memory: they can perform completely dierent and asynchronous tasks, as well as cooperate by suitably distributing the workload of a complex task. Additional opportunities are offered by the more classical clusters of nodes, usually connected in dierent distributed-memory topologies to form large-scale high-performance machines with tens to hundred-thousands of processors. Needless to say, various mix of these architectures (such as clusters of GPUs) are also possible and sold, indeed. It should be noticed, however, that all the mentioned scenarios can exist even in very small-sized and cheap configurations. This is particularly relevant for GPUs: initially targeted at 3D graphics applications, they have been employed in many other scientific computing areas, such as signal and image reconstruction. Recent applications show that in many cases GPU performances are comparable to those of a medium-sized cluster, at a fraction of its cost. Thus, also small laboratories, which cannot afford a cluster, can benet from a substantial reduction of computing time compared to a standard CPU system. Nevertheless, for very large problems, as 3D imaging in confocal microscopy, the size of GPU's on-devices dedicated memory can become a limit to performance. For this reason, the ability to exploit the scalability of clusters by means of standard MPI implementations is still crucial for facing very large-scale applications. Here, we deal with both the GPU and the MPI implementation of an optimization algorithm, called Scaled Gradient Projection (SGP) method, that applies to several imaging problems. GPU versions of this method have been recently evaluated, while an MPI version is presented in this work in the cases of both deblurring and denoising problems. A computational study of the different implementations is reported, to show the enhancements provided by these parallel approaches in solving both 2D and 3D imaging problems.}, keywords = {Image restoration, Optimization methods, parallel computing}, pubstate = {published}, tppubtype = {incollection} } We consider the numerical solution on modern multicore architectures of large-scale optimization problems arising in image restoration. An efficient solution of these optimization problems is important in several areas, such as medical imaging, microscopy and astronomy, where large-scale imaging is a basic task. To face these challenging problems, a lot of effort has been put in designing effective algorithms, that have largely improved the classical optimization strategies usually applied in image processing. Nevertheless, in many large-scale applications also these improved algorithms do not provide the expected reconstruction in a reasonable time. In these cases, the modern multiprocessor architectures represent an important resource for reducing the reconstruction time. Actually, one can consider different possibilities for a parallel computational scenario. One is the use of Graphics Processing Units (GPUs): they were originally designed to perform many simple operations on matrices and vectors with high efficiency and low accuracy (single precision arithmetic), but they have recently seen a huge development of both computational power and accuracy (double precision arithmetic), while still retaining compactness and low price. Another possibility is the use of last-generation multi-core CPUs, where general-purpose, very powerful computational cores are integrated inside the same CPU and a bunch of CPUs can be hosted by the same motherboard, sharing a central memory: they can perform completely dierent and asynchronous tasks, as well as cooperate by suitably distributing the workload of a complex task. Additional opportunities are offered by the more classical clusters of nodes, usually connected in dierent distributed-memory topologies to form large-scale high-performance machines with tens to hundred-thousands of processors. Needless to say, various mix of these architectures (such as clusters of GPUs) are also possible and sold, indeed. It should be noticed, however, that all the mentioned scenarios can exist even in very small-sized and cheap configurations. This is particularly relevant for GPUs: initially targeted at 3D graphics applications, they have been employed in many other scientific computing areas, such as signal and image reconstruction. Recent applications show that in many cases GPU performances are comparable to those of a medium-sized cluster, at a fraction of its cost. Thus, also small laboratories, which cannot afford a cluster, can benet from a substantial reduction of computing time compared to a standard CPU system. Nevertheless, for very large problems, as 3D imaging in confocal microscopy, the size of GPU's on-devices dedicated memory can become a limit to performance. For this reason, the ability to exploit the scalability of clusters by means of standard MPI implementations is still crucial for facing very large-scale applications. Here, we deal with both the GPU and the MPI implementation of an optimization algorithm, called Scaled Gradient Projection (SGP) method, that applies to several imaging problems. GPU versions of this method have been recently evaluated, while an MPI version is presented in this work in the cases of both deblurring and denoising problems. A computational study of the different implementations is reported, to show the enhancements provided by these parallel approaches in solving both 2D and 3D imaging problems. |
Burgio, Paolo; Marongiu, Andrea; Heller,; Chavet,; Coussy,; Benini, Luca OpenMP-based Synergistic Parallelization and HW Acceleration for On-Chip Shared-Memory Clusters Inproceedings 15th Euromicro Conference on Digital Systems Design, pp. 751–758, IEEE Press, USA, 2012, ISBN: 978-1-4673-2498-4. Abstract | Links | BibTeX | Tags: OpenMP-based Synergistic Parallelization and HW Acceleration for On-Chip Shared-Memory Clusters @inproceedings{burgio_openmp-based_2012, title = {OpenMP-based Synergistic Parallelization and HW Acceleration for On-Chip Shared-Memory Clusters}, author = {Paolo Burgio and Andrea Marongiu and D Heller and C Chavet and P Coussy and Luca Benini}, doi = {10.1109/DSD.2012.97}, isbn = {978-1-4673-2498-4}, year = {2012}, date = {2012-01-01}, booktitle = {15th Euromicro Conference on Digital Systems Design}, pages = {751--758}, publisher = {IEEE Press}, address = {USA}, abstract = {Modern embedded MPSoC designs increasingly couple hardware accelerators to processing cores to trade between energy efficiency and platform specialization. To assist effective design of such systems there is the need on one hand for clear methodologies to streamline accelerator definition and instantiation, on the other for architectural templates and runtime techniques that minimize processors-to-accelerator communication costs. In this paper we present an architecture featuring tightly-coupled processors and accelerators, with zerocopy communication. Efficient programming is supported by an extended OpenMP programming model, where custom directives allow to specialize code regions for execution on parallel cores, accelerators, or a mix of the two. Our integrated approach enables fast yet accurate exploration of accelerator-based HW and SW architectures.}, keywords = {OpenMP-based Synergistic Parallelization and HW Acceleration for On-Chip Shared-Memory Clusters}, pubstate = {published}, tppubtype = {inproceedings} } Modern embedded MPSoC designs increasingly couple hardware accelerators to processing cores to trade between energy efficiency and platform specialization. To assist effective design of such systems there is the need on one hand for clear methodologies to streamline accelerator definition and instantiation, on the other for architectural templates and runtime techniques that minimize processors-to-accelerator communication costs. In this paper we present an architecture featuring tightly-coupled processors and accelerators, with zerocopy communication. Efficient programming is supported by an extended OpenMP programming model, where custom directives allow to specialize code regions for execution on parallel cores, accelerators, or a mix of the two. Our integrated approach enables fast yet accurate exploration of accelerator-based HW and SW architectures. |
Marongiu, Andrea; Burgio, Paolo; Benini, Luca Fast and lightweight support for nested parallelism on cluster-based embedded many-cores Inproceedings Proceedings of Design, Automation & Test in Europe Conference & Exhibition (DATE), 2012, pp. 105–110, IEEE Press, USA, 2012, ISBN: 978-1-4577-2145-8. Abstract | Links | BibTeX | Tags: CLUSTERED ARCHITECTURES, MANY-CORE EMBEDDED SYSTEMS, NESTED PARALLELISM, OPENMP, Programming models, SHARED MEMORY EMBEDDED SYSTEMS, SYNCHRONIZATION @inproceedings{marongiu_fast_2012, title = {Fast and lightweight support for nested parallelism on cluster-based embedded many-cores}, author = {Andrea Marongiu and Paolo Burgio and Luca Benini}, doi = {10.1109/DATE.2012.6176441}, isbn = {978-1-4577-2145-8}, year = {2012}, date = {2012-01-01}, booktitle = {Proceedings of Design, Automation & Test in Europe Conference & Exhibition (DATE), 2012}, pages = {105--110}, publisher = {IEEE Press}, address = {USA}, abstract = {Several recent many-core accelerators have been architected as fabrics of tightly-coupled shared memory clusters. A hierarchical interconnection system is used – with a crossbarlike medium inside each cluster and a network-on-chip (NoC) at the global level – which make memory operations nonuniform (NUMA). Nested parallelism represents a powerful programming abstraction for these architectures, where a first level of parallelism can be used to distribute coarse-grained tasks to clusters, and additional levels of fine-grained parallelism can be distributed to processors within a cluster. This paper presents a lightweight and highly optimized support for nested parallelism on cluster-based embedded many-cores. We assess the costs to enable multi-level parallelization and demonstrate that our techniques allow to extract high degrees of parallelism.}, keywords = {CLUSTERED ARCHITECTURES, MANY-CORE EMBEDDED SYSTEMS, NESTED PARALLELISM, OPENMP, Programming models, SHARED MEMORY EMBEDDED SYSTEMS, SYNCHRONIZATION}, pubstate = {published}, tppubtype = {inproceedings} } Several recent many-core accelerators have been architected as fabrics of tightly-coupled shared memory clusters. A hierarchical interconnection system is used – with a crossbarlike medium inside each cluster and a network-on-chip (NoC) at the global level – which make memory operations nonuniform (NUMA). Nested parallelism represents a powerful programming abstraction for these architectures, where a first level of parallelism can be used to distribute coarse-grained tasks to clusters, and additional levels of fine-grained parallelism can be distributed to processors within a cluster. This paper presents a lightweight and highly optimized support for nested parallelism on cluster-based embedded many-cores. We assess the costs to enable multi-level parallelization and demonstrate that our techniques allow to extract high degrees of parallelism. |